1

I'm trying to implement a caching mechanism in scrapy using my own downloader-middleware. Implementation is as follows

class DevCacheMiddleware(object):
dev_cache = None
def __init__(self, crawler):
    self.crawler = crawler
    dcfile = open('dev_cache.pkl', 'rb+')
    try:
        self.dev_cache = pickle.load(dcfile)
    except:
        None
    if self.dev_cache==None:
        self.dev_cache = {}

@classmethod
def from_crawler(cls, crawler):
    o = cls(crawler)
    crawler.signals.connect(o.spider_closed, signal=spider_closed)
    return o

def process_request(self, request, spider):
    return None

def process_response(self, request, response, spider):
    if response.status == 200:
        if(spider.name in self.dev_cache and request.url in self.dev_cache[spider.name]):
            print("found cached response for <"+ request.url +">")
            return self.dev_cache[spider.name][request.url]
        else:
            self.cache_response(request, response, spider)

    return response

def cache_response(self, request, response, spider):
    if not (spider.name in self.dev_cache):
        self.dev_cache[spider.name] = {}
    self.dev_cache[spider.name][request.url] = response

def spider_closed(self, spider):
    dcfile = open('dev_cache.pickle', 'wb')
    pickle.dump(self.dev_cache, dcfile)
    dcfile.close()

when I try to run the spider it raised the exception TypeError: can't pickle Selector objects. Please help me achieve the serialization.

SriDatta Yalla
  • 433
  • 5
  • 12

0 Answers0