Changes to be committed: - kleinere Korrekturen/Verbesserungen modified: homepage/redesign2018/markdownExperiment/Makefile modified: homepage/redesign2018/markdownExperiment/src/jsonSd/cwsvJudo.json - Neuer Ansatz für den Wettkampfsammler new file: wkOrg/src/wkScraper/scrapyDocAuthorSpider.py new file: wkOrg/src/wkScraper/scrapyDocQuoteSpider.py new file: wkOrg/src/wkScraper/scrapyJvsKalender.py
38 lines
1.1 KiB
Python
38 lines
1.1 KiB
Python
#!/usr/bin/env python
|
|
# -*- coding: UTF-8 -*-
|
|
|
|
import scrapy
|
|
|
|
|
|
class QuotesSpider(scrapy.Spider):
|
|
name = "quotes"
|
|
start_urls = [
|
|
# 'https://judoverbandsachsen.de/kalender/',
|
|
"https://judoverbandsachsen.de/kalender/?term_id%5B%5D=48&altersklassen%5B%5D=m-U09&altersklassen%5B%5D=w-U09",
|
|
]
|
|
|
|
def eventExtract( self, url, callbackHandler ):
|
|
yield self.follow( url, callbackHandler )
|
|
|
|
def parseJvsKalenderEvent(self, response):
|
|
def extractingJvsEvent(query):
|
|
return "someStuff"
|
|
|
|
yield {
|
|
'stuff': extractingJvsEvent("someSelector"),
|
|
}
|
|
|
|
def parse(self, response):
|
|
for post in response.css('div.posts'):
|
|
url = post.css('div.col-4>a::attr(href)').extract_first()
|
|
# details = yield response.follow( url, self.parseJvsKalenderEvent )
|
|
|
|
yield {
|
|
'date': " ".join( post.css('div.col-2>time::text').extract_first().split() ),
|
|
'name': " ".join( post.css('div.col-4>a::text').extract_first().split() ),
|
|
'url': url,
|
|
'ort' : " ".join( post.css('div.col-3::text').extract_first().split() ),
|
|
}
|
|
|
|
yield response.follow( url, self.parseJvsKalenderEvent )
|