I am looking for a way to save the spider output in a python variable instead of saving it in a json
file and reading it back in the program.
import scrapy
from scrapy.crawler import CrawlerProcess
class TestSpider(scrapy.Spider):
name = 'test'
start_urls = ['https://www.wikipedia.org']
def parse(self, response):
yield {
'text' : response.css(".jsl10n.localized-slogan::text").extract_first()
}
if __name__ == "__main__":
os.remove('result.json')
process = CrawlerProcess({
'USER_AGENT': 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)',
'FEED_FORMAT': 'json',
'FEED_URI': 'result.json'
})
process.crawl(TestSpider)
process.start()
I want to avoid the below step and directly read the value instead of saving it on disk first
with io.open('result.json', encoding='utf-8') as json_data:
d = json.load(json_data)
text = d[0]['text']