|
@@ -0,0 +1,171 @@
|
|
|
+# -*- coding: utf-8 -*-
|
|
|
+
|
|
|
+from content_spider.baseSpider import baseSpider
|
|
|
+from content_spider.baseSpider import baseUpdateSpider
|
|
|
+from content_spider.baseSpider import fixChapterSpider
|
|
|
+from content_spider.baseSpider import baseUpdateBookStatusSpider
|
|
|
+from content_spider.Util import md5
|
|
|
+import time
|
|
|
+import json
|
|
|
+
|
|
|
+
|
|
|
+name = 'wandu'
|
|
|
+allowed_domains = ['api.wandu.cn']
|
|
|
+source = 'zy_wandu'
|
|
|
+source_name = '万读'
|
|
|
+source_id = 44
|
|
|
+appid = 'wdca9d79a7b66c39eb'
|
|
|
+base_url = 'https://api.wandu.cn/open/search/{}?appid=' + appid
|
|
|
+
|
|
|
+
|
|
|
+category = [{'id': 1, 'name': '都市言情', 'channel_id': 2, 'category_id': 88, 'category_name': '豪门总裁'},
|
|
|
+ {'id': 2, 'name': '古代言情', 'channel_id': 2, 'category_id': 83, 'category_name': '穿越重生'},
|
|
|
+ {'id': 4, 'name': '青春校园', 'channel_id': 2, 'category_id': 104, 'category_name': '青春校园'},
|
|
|
+ {'id': 5, 'name': '总裁豪门', 'channel_id': 2, 'category_id': 88, 'category_name': '豪门总裁'},
|
|
|
+ {'id': 15, 'name': '灵异惊悚', 'channel_id': 2, 'category_id': 114, 'category_name': '恐怖惊悚'},
|
|
|
+ {'id': 29, 'name': '现代言情', 'channel_id': 2, 'category_id': 88, 'category_name': '豪门总裁'},
|
|
|
+ {'id': 30, 'name': '幻想言情', 'channel_id': 2, 'category_id': 110, 'category_name': '上古蛮荒'},
|
|
|
+ {'id': 31, 'name': '魔幻异界', 'channel_id': 2, 'category_id': 110, 'category_name': '上古蛮荒'},
|
|
|
+ {'id': 32, 'name': '仙侠情缘', 'channel_id': 2, 'category_id': 97, 'category_name': '古典仙侠'},
|
|
|
+ {'id': 33, 'name': '推理悬疑', 'channel_id': 2, 'category_id': 113, 'category_name': '悬疑探险'},
|
|
|
+ {'id': 34, 'name': '次元同人', 'channel_id': 2, 'category_id': 107, 'category_name': '其他'},
|
|
|
+ {'id': 36, 'name': '游戏竞技', 'channel_id': 2, 'category_id': 119, 'category_name': '游戏'},
|
|
|
+ {'id': 37, 'name': '短篇其他', 'channel_id': 2, 'category_id': 107, 'category_name': '其他'},
|
|
|
+ {'id': 38, 'name': '穿越重生', 'channel_id': 2, 'category_id': 83, 'category_name': '穿越重生'},
|
|
|
+ {'id': 39, 'name': '女尊女强', 'channel_id': 2, 'category_id': 123, 'category_name': '女尊王朝'},
|
|
|
+ {'id': 40, 'name': '轻松爆笑', 'channel_id': 2, 'category_id': 107, 'category_name': '其他'},
|
|
|
+ {'id': 42, 'name': '纯爱', 'channel_id': 2, 'category_id': 107, 'category_name': '其他'},
|
|
|
+ {'id': 6, 'name': '悬疑惊悚', 'channel_id': 1, 'category_id': 81, 'category_name': '灵异恐怖'},
|
|
|
+ {'id': 7, 'name': '婚恋生活', 'channel_id': 1, 'category_id': 54, 'category_name': '都市爱情'},
|
|
|
+ {'id': 9, 'name': '热血青春', 'channel_id': 1, 'category_id': 94, 'category_name': '青春爱情'},
|
|
|
+ {'id': 10, 'name': '游戏竞技', 'channel_id': 1, 'category_id': 19, 'category_name': '游戏竞技'},
|
|
|
+ {'id': 11, 'name': '都市情感', 'channel_id': 1, 'category_id': 54, 'category_name': '都市爱情'},
|
|
|
+ {'id': 12, 'name': '官场仕途', 'channel_id': 1, 'category_id': 55, 'category_name': '官场沉浮'},
|
|
|
+ {'id': 13, 'name': '武侠仙侠', 'channel_id': 1, 'category_id': 21, 'category_name': '武侠仙侠'},
|
|
|
+ {'id': 14, 'name': '都市生活', 'channel_id': 1, 'category_id': 54, 'category_name': '都市爱情'},
|
|
|
+ {'id': 16, 'name': '乡村生活', 'channel_id': 1, 'category_id': 57, 'category_name': '乡土风情'},
|
|
|
+ {'id': 17, 'name': '修真玄幻', 'channel_id': 1, 'category_id': 23, 'category_name': '玄幻奇幻'},
|
|
|
+ {'id': 18, 'name': '军事历史', 'channel_id': 1, 'category_id': 14, 'category_name': '历史穿越'},
|
|
|
+ {'id': 19, 'name': '都市异能', 'channel_id': 1, 'category_id': 68, 'category_name': '现代修真'},
|
|
|
+ {'id': 20, 'name': '玄幻奇幻', 'channel_id': 1, 'category_id': 23, 'category_name': '玄幻奇幻'},
|
|
|
+ {'id': 24, 'name': '科幻未来', 'channel_id': 1, 'category_id': 127, 'category_name': '其他作品'},
|
|
|
+ {'id': 25, 'name': '都市 娱乐', 'channel_id': 1, 'category_id': 54, 'category_name': '都市爱情'},
|
|
|
+ {'id': 26, 'name': '次元同人', 'channel_id': 1, 'category_id': 127, 'category_name': '其他作品'},
|
|
|
+ {'id': 27, 'name': '短篇其他', 'channel_id': 1, 'category_id': 127, 'category_name': '其他作品'},
|
|
|
+ {'id': 28, 'name': '现实反思', 'channel_id': 1, 'category_id': 127, 'category_name': '其他作品'},
|
|
|
+ {'id': 41, 'name': '轻松爆笑', 'channel_id': 1, 'category_id': 127, 'category_name': '其他作品'}]
|
|
|
+
|
|
|
+
|
|
|
+def get_category(tag_id):
|
|
|
+ for item in category:
|
|
|
+ if int(tag_id) == item['id']:
|
|
|
+ return item
|
|
|
+ return category[0]
|
|
|
+
|
|
|
+
|
|
|
+class wanduProcess(object):
|
|
|
+ name = name
|
|
|
+ allowed_domains = allowed_domains
|
|
|
+ source = source
|
|
|
+ source_name = source_name
|
|
|
+ source_id = source_id
|
|
|
+
|
|
|
+
|
|
|
+ def get_start_url(self):
|
|
|
+ #https://api.wandu.cn/open/search/getwandubooklist?appid=xxx
|
|
|
+ return base_url.format('getwandubooklist')
|
|
|
+
|
|
|
+ def bid_list_result(self, response):
|
|
|
+ result = json.loads(response.text)
|
|
|
+ if result is None or result.get('data') is None:
|
|
|
+ return []
|
|
|
+ result_list = []
|
|
|
+ for item in result['data']:
|
|
|
+ result_list.append({'id': item['novel_id']})
|
|
|
+ return result_list
|
|
|
+
|
|
|
+ def get_book_info_url(self, bid):
|
|
|
+ #http://api.wandu.cn/open/search/getwandubookinfo?appid=xxx&bookid=xx
|
|
|
+ return base_url.format('getwandubookinfo') + '&bookid={}'.format(bid)
|
|
|
+
|
|
|
+ def book_info_result(self, response):
|
|
|
+ result = json.loads(response.text)
|
|
|
+ result = result['data']
|
|
|
+ category_info = get_category(result['tag_id']);
|
|
|
+ return {
|
|
|
+ 'bid': result['novel_id'], 'name': result['novel_name'], 'author': result['author_name'],
|
|
|
+ 'intro': result['summary'], 'cover': result['pic'], 'keyword': '',
|
|
|
+ 'status':result['is_done'], 'category': category_info['category_name'],'category_id':category_info['category_id'],
|
|
|
+ 'channel': result['channel_id']
|
|
|
+ }
|
|
|
+
|
|
|
+ def get_chapter_list_url(self, bid):
|
|
|
+ #https://api.wandu.cn/open/search/getwanduchapterlist?appid=xxx&bookid=xx
|
|
|
+ return base_url.format('getwanduchapterlist') + '&bookid={}&per_num=10000'.format(bid)
|
|
|
+
|
|
|
+ def chapter_list_result(self, response):
|
|
|
+ result = json.loads(response.text)
|
|
|
+ if result is None or result.get('data') is None:
|
|
|
+ return []
|
|
|
+
|
|
|
+ result_list = []
|
|
|
+ i = 0
|
|
|
+ for chapter_item in result['data']['lists']:
|
|
|
+ i = i+1
|
|
|
+ result_list.append({
|
|
|
+ 'source_chapter_id': chapter_item['paragraph_id'], 'name': chapter_item['title'],
|
|
|
+ 'sequence': chapter_item['order_num'], 'is_vip': chapter_item['is_vip'],
|
|
|
+ 'size': chapter_item['word_num'], 'recent_update_at': chapter_item['update_time']
|
|
|
+ })
|
|
|
+ return result_list
|
|
|
+
|
|
|
+ def get_chapter_content_url(self, bid, cid):
|
|
|
+ #http://api.wandu.cn/open/search/getwanduchaptercontent?appid=xxx&bookid=xx&chapterid=xx
|
|
|
+ return base_url.format('getwanduchaptercontent') + '&bookid={}&chapterid={}'.format(bid, cid)
|
|
|
+
|
|
|
+ def chapter_content_result(self, response):
|
|
|
+ result = json.loads(response.text)
|
|
|
+ if result is None:
|
|
|
+ return {'content': ''}
|
|
|
+
|
|
|
+ return {
|
|
|
+ 'content': result['data']['content']
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+class wanduSpider(wanduProcess,baseSpider):
|
|
|
+ name = name
|
|
|
+
|
|
|
+ custom_settings = {
|
|
|
+ 'DOWNLOAD_DELAY': 0.1,
|
|
|
+ 'SOURCE': source,
|
|
|
+ 'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log'
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+class wanduUpdateSpider(wanduProcess,baseUpdateSpider):
|
|
|
+ name = name + "update"
|
|
|
+ custom_settings = {
|
|
|
+ 'DOWNLOAD_DELAY': 0.1,
|
|
|
+ 'SOURCE': source,
|
|
|
+ 'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log'
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+class wanduFixSpider(wanduProcess,fixChapterSpider):
|
|
|
+ name = name + 'fix'
|
|
|
+ custom_settings = {
|
|
|
+ 'DOWNLOAD_DELAY': 0.1,
|
|
|
+ 'SOURCE': source,
|
|
|
+ 'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log'
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+class wanduBookInfoSpider(wanduProcess,baseUpdateBookStatusSpider):
|
|
|
+ name = name + "bookinfo"
|
|
|
+ custom_settings = {
|
|
|
+ 'DOWNLOAD_DELAY': 0.1,
|
|
|
+ 'SOURCE': source,
|
|
|
+ 'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log'
|
|
|
+ }
|