zhaoyang 1 年間 前
コミット
c67172ac3f

+ 0 - 0
content_spider/spiders/xiwen/__init__.py


+ 160 - 0
content_spider/spiders/xiwen/xiwen.py

@@ -0,0 +1,160 @@
+# -*- coding: utf-8 -*-
+
+from content_spider.baseSpider import baseSpider
+from content_spider.baseSpider import baseUpdateSpider
+from content_spider.baseSpider import fixChapterSpider
+from content_spider.baseSpider import baseUpdateBookStatusSpider
+from content_spider.Util import md5
+import time
+import json
+
+
+name = 'wandu'
+allowed_domains = ['xyopen.8kana.com']
+source = 'zy_xiwen'
+source_name = '溪文'
+source_id = 45
+key = 'xiyuexsw'
+secret = '1234567890'
+base_url = 'http://xyopen.8kana.com/channel/{}?key=' + key + '&sign={}'
+
+
+category = [{'id': 102, 'name': '豪门总裁', 'channel_id': 2, 'category_id': 88, 'category_name': '豪门总裁'},
+ {'id': 202, 'name': '现代重生', 'channel_id': 2, 'category_id': 118, 'category_name': '都市异能'},
+ {'id': 203, 'name': '古代言情', 'channel_id': 2, 'category_id': 120, 'category_name': '宫斗宅斗'},
+ {'id': 204, 'name': '穿越重生', 'channel_id': 2, 'category_id': 83, 'category_name': '穿越重生'},
+ {'id': 205, 'name': '种田言情', 'channel_id': 2, 'category_id': 84, 'category_name': '经商种田'},
+ {'id': 206, 'name': '婚恋生活', 'channel_id': 2, 'category_id': 98, 'category_name': '婚恋情感'},
+ {'id': 207, 'name': '奇幻仙侠', 'channel_id': 2, 'category_id': 97, 'category_name': '古典仙侠'},
+ {'id': 208, 'name': '快穿言情', 'channel_id': 2, 'category_id': 117, 'category_name': '娱乐明星'},
+ {'id': 209, 'name': '青春校园', 'channel_id': 2, 'category_id': 104, 'category_name': '青春校园'},
+ {'id': 210, 'name': '悬疑灵异', 'channel_id': 2, 'category_id': 113, 'category_name': '悬疑探险'},
+ {'id': 211, 'name': '架空历史', 'channel_id': 2, 'category_id': 121, 'category_name': '古典架空'},
+ {'id': 101, 'name': '都市', 'channel_id': 1, 'category_id': 54, 'category_name': '都市爱情'},
+ {'id': 102, 'name': '玄幻', 'channel_id': 1, 'category_id': 23, 'category_name': '玄幻奇幻'},
+ {'id': 103, 'name': '历史', 'channel_id': 1, 'category_id': 14, 'category_name': '历史穿越'},
+ {'id': 104, 'name': '武侠', 'channel_id': 1, 'category_id': 21, 'category_name': '武侠仙侠'},
+ {'id': 105, 'name': '科幻', 'channel_id': 1, 'category_id': 23, 'category_name': '玄幻奇幻'},
+ {'id': 106, 'name': '悬疑', 'channel_id': 1, 'category_id': 81, 'category_name': '灵异恐怖'},
+ {'id': 107, 'name': '都市生活', 'channel_id': 1, 'category_id': 54, 'category_name': '都市爱情'},
+ {'id': 108, 'name': '体育', 'channel_id': 1, 'category_id': 19, 'category_name': '游戏竞技'},
+ {'id': 109, 'name': '游戏动漫', 'channel_id': 1, 'category_id': 127, 'category_name': '其他作品'},
+ {'id': 110, 'name': '影视小说', 'channel_id': 1, 'category_id': 127, 'category_name': '其他作品'},
+ {'id': 111, 'name': '仙侠', 'channel_id': 1, 'category_id': 21, 'category_name': '武侠仙侠'}]
+
+
+def get_category(tag_id):
+    for item in category:
+        if int(tag_id) == item['id']:
+            return item
+    return category[0]
+
+
+class xiwenProcess(object):
+    name = name
+    allowed_domains = allowed_domains
+    source = source
+    source_name = source_name
+    source_id = source_id
+
+
+    def get_start_url(self):
+        sign = md5(key + secret)
+        return base_url.format('books',sign)
+
+    def bid_list_result(self, response):
+        result = json.loads(response.text)
+        if result is None or result.get('Data') is None:
+            return []
+        result_list = []
+        for item in result['Data']:
+            result_list.append({'id': item['id']})
+        return result_list
+
+    def get_book_info_url(self, bid):
+        sign = md5(key + secret + str(bid))
+        return base_url.format('books',sign) + '&book_id={}'.format(bid)
+
+    def book_info_result(self, response):
+        result = json.loads(response.text)
+        result = result['Data']
+        category_info = get_category(result['class_id']);
+        return {
+            'bid': result['id'], 'name': result['name'], 'author': result['author'],
+            'intro': result['intro'], 'cover': result['cover'], 'keyword': result['keywords'],
+            'status':1 if int(result['is_serial']) == 0 else 0, 'category': category_info['category_name'],'category_id':category_info['category_id'],
+            'channel': category_info['channel_id']
+        }
+
+    def get_chapter_list_url(self, bid):
+        sign = md5(key + secret + str(bid))
+        return base_url.format('chapters',sign) + '&book_id={}'.format(bid)
+
+    def chapter_list_result(self, response):
+        result = json.loads(response.text)
+        if result is None or result.get('data') is None:
+            return []
+
+        result_list = []
+        i = 0
+        for chapter_item in result['Data']:
+            i = i+1
+            result_list.append({
+                'source_chapter_id': chapter_item['id'], 'name': chapter_item['title'],
+                'sequence': i, 'is_vip': 0,
+                'size': 0, 'recent_update_at': chapter_item['update_time']
+            })
+        return result_list
+
+    def get_chapter_content_url(self, bid, cid):
+        sign = md5(key + secret + str(bid) + str(cid))
+        return base_url.format('chapters',sign) + '&book_id={}&chapter_id={}'.format(bid, cid)
+
+    def chapter_content_result(self, response):
+        result = json.loads(response.text)
+        if result is None:
+            return {'content': ''}
+
+        return {
+            'content': result['Data']['content'],
+            'is_vip': result['Data']['is_vip'],
+            'size': len(result['Data']['content'])
+        }
+
+    
+class xiwenSpider(xiwenProcess,baseSpider):
+    name = name
+
+    custom_settings = {
+        'DOWNLOAD_DELAY': 0.1,
+        'SOURCE': source,
+        'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log'
+    }
+
+
+class xiwenUpdateSpider(xiwenProcess,baseUpdateSpider):
+    name = name + "update"
+    custom_settings = {
+        'DOWNLOAD_DELAY': 0.1,
+        'SOURCE': source,
+        'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log'
+    }
+
+
+    
+class xiwenFixSpider(xiwenProcess,fixChapterSpider):
+    name = name + 'fix'
+    custom_settings = {
+        'DOWNLOAD_DELAY': 0.1,
+        'SOURCE': source,
+        'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log'
+    }
+
+
+class xiwenBookInfoSpider(xiwenProcess,baseUpdateBookStatusSpider):
+    name = name + "bookinfo"
+    custom_settings = {
+        'DOWNLOAD_DELAY': 0.1,
+        'SOURCE': source,
+        'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log'
+    }

BIN
content_spider/spiders/xiwen/溪阅文学_作品内容输出API文档_1.0.2.pdf