|
@@ -9,14 +9,9 @@
|
|
|
|
|
|
import os
|
|
|
import re
|
|
|
-import redis
|
|
|
from content_spider.mysqlHelper import MysqlHelper
|
|
|
from content_spider.Util import my_log
|
|
|
|
|
|
-redis_crawl_flag_key = 'book:crawl:stats'
|
|
|
-redis_update_flag_key = 'book:update:stats'
|
|
|
-redis_fix_flag_key = 'book:fix:stats'
|
|
|
-
|
|
|
|
|
|
def formatcontent(content):
|
|
|
content = content.replace(' ', '')
|
|
@@ -69,14 +64,12 @@ class ChapterItemPipeline:
|
|
|
def close_spider(self, spider):
|
|
|
spider_type = self.__stats.get_value('spider_type')
|
|
|
if spider_type == 'update':
|
|
|
- self.__redis_conn.hset(redis_update_flag_key, spider.name, 0)
|
|
|
my_log(spider.name, 'update end ....')
|
|
|
book_list = self.__stats.get_value('bid_list')
|
|
|
if book_list is not None:
|
|
|
for book in book_list:
|
|
|
spider.mysqlHelper.after_spider(book['bid'],book['start_sequence'])
|
|
|
if spider_type == 'add':
|
|
|
- self.__redis_conn.hset(redis_crawl_flag_key, spider.name, 0)
|
|
|
my_log(spider.name, 'crawl end ....')
|
|
|
bid_list = self.__stats.get_value('bid_list')
|
|
|
if bid_list is not None:
|