|
@@ -0,0 +1,160 @@
|
|
|
|
+# -*- coding: utf-8 -*-
|
|
|
|
+
|
|
|
|
+from content_spider.baseSpider import baseSpider
|
|
|
|
+from content_spider.baseSpider import baseUpdateSpider
|
|
|
|
+from content_spider.baseSpider import fixChapterSpider
|
|
|
|
+from content_spider.baseSpider import baseUpdateBookStatusSpider
|
|
|
|
+from content_spider.Util import md5
|
|
|
|
+import time
|
|
|
|
+import json
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+name = 'wandu'
|
|
|
|
+allowed_domains = ['xyopen.8kana.com']
|
|
|
|
+source = 'zy_xiwen'
|
|
|
|
+source_name = '溪文'
|
|
|
|
+source_id = 45
|
|
|
|
+key = 'xiyuexsw'
|
|
|
|
+secret = '1234567890'
|
|
|
|
+base_url = 'http://xyopen.8kana.com/channel/{}?key=' + key + '&sign={}'
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+category = [{'id': 102, 'name': '豪门总裁', 'channel_id': 2, 'category_id': 88, 'category_name': '豪门总裁'},
|
|
|
|
+ {'id': 202, 'name': '现代重生', 'channel_id': 2, 'category_id': 118, 'category_name': '都市异能'},
|
|
|
|
+ {'id': 203, 'name': '古代言情', 'channel_id': 2, 'category_id': 120, 'category_name': '宫斗宅斗'},
|
|
|
|
+ {'id': 204, 'name': '穿越重生', 'channel_id': 2, 'category_id': 83, 'category_name': '穿越重生'},
|
|
|
|
+ {'id': 205, 'name': '种田言情', 'channel_id': 2, 'category_id': 84, 'category_name': '经商种田'},
|
|
|
|
+ {'id': 206, 'name': '婚恋生活', 'channel_id': 2, 'category_id': 98, 'category_name': '婚恋情感'},
|
|
|
|
+ {'id': 207, 'name': '奇幻仙侠', 'channel_id': 2, 'category_id': 97, 'category_name': '古典仙侠'},
|
|
|
|
+ {'id': 208, 'name': '快穿言情', 'channel_id': 2, 'category_id': 117, 'category_name': '娱乐明星'},
|
|
|
|
+ {'id': 209, 'name': '青春校园', 'channel_id': 2, 'category_id': 104, 'category_name': '青春校园'},
|
|
|
|
+ {'id': 210, 'name': '悬疑灵异', 'channel_id': 2, 'category_id': 113, 'category_name': '悬疑探险'},
|
|
|
|
+ {'id': 211, 'name': '架空历史', 'channel_id': 2, 'category_id': 121, 'category_name': '古典架空'},
|
|
|
|
+ {'id': 101, 'name': '都市', 'channel_id': 1, 'category_id': 54, 'category_name': '都市爱情'},
|
|
|
|
+ {'id': 102, 'name': '玄幻', 'channel_id': 1, 'category_id': 23, 'category_name': '玄幻奇幻'},
|
|
|
|
+ {'id': 103, 'name': '历史', 'channel_id': 1, 'category_id': 14, 'category_name': '历史穿越'},
|
|
|
|
+ {'id': 104, 'name': '武侠', 'channel_id': 1, 'category_id': 21, 'category_name': '武侠仙侠'},
|
|
|
|
+ {'id': 105, 'name': '科幻', 'channel_id': 1, 'category_id': 23, 'category_name': '玄幻奇幻'},
|
|
|
|
+ {'id': 106, 'name': '悬疑', 'channel_id': 1, 'category_id': 81, 'category_name': '灵异恐怖'},
|
|
|
|
+ {'id': 107, 'name': '都市生活', 'channel_id': 1, 'category_id': 54, 'category_name': '都市爱情'},
|
|
|
|
+ {'id': 108, 'name': '体育', 'channel_id': 1, 'category_id': 19, 'category_name': '游戏竞技'},
|
|
|
|
+ {'id': 109, 'name': '游戏动漫', 'channel_id': 1, 'category_id': 127, 'category_name': '其他作品'},
|
|
|
|
+ {'id': 110, 'name': '影视小说', 'channel_id': 1, 'category_id': 127, 'category_name': '其他作品'},
|
|
|
|
+ {'id': 111, 'name': '仙侠', 'channel_id': 1, 'category_id': 21, 'category_name': '武侠仙侠'}]
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def get_category(tag_id):
|
|
|
|
+ for item in category:
|
|
|
|
+ if int(tag_id) == item['id']:
|
|
|
|
+ return item
|
|
|
|
+ return category[0]
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+class xiwenProcess(object):
|
|
|
|
+ name = name
|
|
|
|
+ allowed_domains = allowed_domains
|
|
|
|
+ source = source
|
|
|
|
+ source_name = source_name
|
|
|
|
+ source_id = source_id
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ def get_start_url(self):
|
|
|
|
+ sign = md5(key + secret)
|
|
|
|
+ return base_url.format('books',sign)
|
|
|
|
+
|
|
|
|
+ def bid_list_result(self, response):
|
|
|
|
+ result = json.loads(response.text)
|
|
|
|
+ if result is None or result.get('Data') is None:
|
|
|
|
+ return []
|
|
|
|
+ result_list = []
|
|
|
|
+ for item in result['Data']:
|
|
|
|
+ result_list.append({'id': item['id']})
|
|
|
|
+ return result_list
|
|
|
|
+
|
|
|
|
+ def get_book_info_url(self, bid):
|
|
|
|
+ sign = md5(key + secret + str(bid))
|
|
|
|
+ return base_url.format('books',sign) + '&book_id={}'.format(bid)
|
|
|
|
+
|
|
|
|
+ def book_info_result(self, response):
|
|
|
|
+ result = json.loads(response.text)
|
|
|
|
+ result = result['Data']
|
|
|
|
+ category_info = get_category(result['class_id']);
|
|
|
|
+ return {
|
|
|
|
+ 'bid': result['id'], 'name': result['name'], 'author': result['author'],
|
|
|
|
+ 'intro': result['intro'], 'cover': result['cover'], 'keyword': result['keywords'],
|
|
|
|
+ 'status':1 if int(result['is_serial']) == 0 else 0, 'category': category_info['category_name'],'category_id':category_info['category_id'],
|
|
|
|
+ 'channel': category_info['channel_id']
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ def get_chapter_list_url(self, bid):
|
|
|
|
+ sign = md5(key + secret + str(bid))
|
|
|
|
+ return base_url.format('chapters',sign) + '&book_id={}'.format(bid)
|
|
|
|
+
|
|
|
|
+ def chapter_list_result(self, response):
|
|
|
|
+ result = json.loads(response.text)
|
|
|
|
+ if result is None or result.get('data') is None:
|
|
|
|
+ return []
|
|
|
|
+
|
|
|
|
+ result_list = []
|
|
|
|
+ i = 0
|
|
|
|
+ for chapter_item in result['Data']:
|
|
|
|
+ i = i+1
|
|
|
|
+ result_list.append({
|
|
|
|
+ 'source_chapter_id': chapter_item['id'], 'name': chapter_item['title'],
|
|
|
|
+ 'sequence': i, 'is_vip': 0,
|
|
|
|
+ 'size': 0, 'recent_update_at': chapter_item['update_time']
|
|
|
|
+ })
|
|
|
|
+ return result_list
|
|
|
|
+
|
|
|
|
+ def get_chapter_content_url(self, bid, cid):
|
|
|
|
+ sign = md5(key + secret + str(bid) + str(cid))
|
|
|
|
+ return base_url.format('chapters',sign) + '&book_id={}&chapter_id={}'.format(bid, cid)
|
|
|
|
+
|
|
|
|
+ def chapter_content_result(self, response):
|
|
|
|
+ result = json.loads(response.text)
|
|
|
|
+ if result is None:
|
|
|
|
+ return {'content': ''}
|
|
|
|
+
|
|
|
|
+ return {
|
|
|
|
+ 'content': result['Data']['content'],
|
|
|
|
+ 'is_vip': result['Data']['is_vip'],
|
|
|
|
+ 'size': len(result['Data']['content'])
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+class xiwenSpider(xiwenProcess,baseSpider):
|
|
|
|
+ name = name
|
|
|
|
+
|
|
|
|
+ custom_settings = {
|
|
|
|
+ 'DOWNLOAD_DELAY': 0.1,
|
|
|
|
+ 'SOURCE': source,
|
|
|
|
+ 'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log'
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+class xiwenUpdateSpider(xiwenProcess,baseUpdateSpider):
|
|
|
|
+ name = name + "update"
|
|
|
|
+ custom_settings = {
|
|
|
|
+ 'DOWNLOAD_DELAY': 0.1,
|
|
|
|
+ 'SOURCE': source,
|
|
|
|
+ 'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log'
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+class xiwenFixSpider(xiwenProcess,fixChapterSpider):
|
|
|
|
+ name = name + 'fix'
|
|
|
|
+ custom_settings = {
|
|
|
|
+ 'DOWNLOAD_DELAY': 0.1,
|
|
|
|
+ 'SOURCE': source,
|
|
|
|
+ 'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log'
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+class xiwenBookInfoSpider(xiwenProcess,baseUpdateBookStatusSpider):
|
|
|
|
+ name = name + "bookinfo"
|
|
|
|
+ custom_settings = {
|
|
|
|
+ 'DOWNLOAD_DELAY': 0.1,
|
|
|
|
+ 'SOURCE': source,
|
|
|
|
+ 'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log'
|
|
|
|
+ }
|