|
@@ -1,6 +1,7 @@
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
|
from content_spider.baseSpider import baseSpider
|
|
|
+from content_spider.baseSpider import fixBookInfoSpider
|
|
|
from content_spider.Util import get_category_by_name
|
|
|
import json
|
|
|
import time
|
|
@@ -135,10 +136,13 @@ class BookSpider(baseSpider):
|
|
|
return None
|
|
|
result = result['data']
|
|
|
category_info = get_category_by_name(result['category'])
|
|
|
+ category_id = 0
|
|
|
+ if category_info is not None:
|
|
|
+ category_id = category_info['id']
|
|
|
return {
|
|
|
'bid': result['id'], 'name': result['bookTitle'], 'author': result['author'],
|
|
|
'intro': result['introduction'], 'cover': result['cover'], 'keyword': result['labels'],
|
|
|
- 'status': result['state'], 'category': result['category'],'category_id':category_info['id'],
|
|
|
+ 'status': result['state'], 'category': result['category'],'category_id':category_id,
|
|
|
'channel': result['channelId']
|
|
|
}
|
|
|
|
|
@@ -174,3 +178,48 @@ class BookSpider(baseSpider):
|
|
|
'size': len(result['data']['content'])
|
|
|
}
|
|
|
|
|
|
+
|
|
|
+class BookInfoFixSpider(fixBookInfoSpider):
|
|
|
+ name = 'kanshubookinfofix'
|
|
|
+ allowed_domains = ['hezuo.lunjian.com']
|
|
|
+ source = 'zy_kanshu'
|
|
|
+ source_name = '看书'
|
|
|
+ source_id = 19
|
|
|
+ base_url = 'http://hezuo.lunjian.com/open/ksbook/{}?channel_id=10054'
|
|
|
+
|
|
|
+ custom_settings = {
|
|
|
+ 'DOWNLOAD_DELAY': 0.01,
|
|
|
+ 'SOURCE': source,
|
|
|
+ 'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log'
|
|
|
+ }
|
|
|
+
|
|
|
+ def get_start_url(self):
|
|
|
+ return self.base_url.format('bookLists')
|
|
|
+
|
|
|
+ def bid_list_result(self, response):
|
|
|
+ result = json.loads(response.text)
|
|
|
+ if result is None:
|
|
|
+ return []
|
|
|
+ result_list = []
|
|
|
+ for item in result['data']:
|
|
|
+ result_list.append({'id': item['id']})
|
|
|
+ return result_list
|
|
|
+
|
|
|
+ def get_book_info_url(self, bid):
|
|
|
+ return self.base_url.format('BookDetail') + '&bookid={}'.format(bid)
|
|
|
+
|
|
|
+ def book_info_result(self, response):
|
|
|
+ result = json.loads(response.text)
|
|
|
+ if result is None or result.get('data') is None:
|
|
|
+ return None
|
|
|
+ result = result['data']
|
|
|
+ category_info = get_category_by_name(result['category'])
|
|
|
+ category_id = 0
|
|
|
+ if category_info is not None:
|
|
|
+ category_id = category_info['id']
|
|
|
+ return {
|
|
|
+ 'bid': result['id'], 'name': result['bookTitle'], 'author': result['author'],
|
|
|
+ 'intro': result['introduction'], 'cover': result['cover'], 'keyword': result['labels'],
|
|
|
+ 'status': result['state'], 'category': result['category'],'category_id':category_id,
|
|
|
+ 'channel': result['channelId']
|
|
|
+ }
|