瀏覽代碼

baichaunzw

zhaoyang 2 年之前
父節點
當前提交
362c13aa6c

+ 165 - 0
content_spider/spiders/baichuanzw/baichuanzw.py

@@ -0,0 +1,165 @@
+# -*- coding: utf-8 -*-
+
+import time
+from content_spider.baseSpider import baseSpider
+from content_spider.pipelines import formatcontent
+from content_spider.baseSpider import baseUpdateSpider
+from content_spider.baseSpider import fixChapterSpider
+from content_spider.baseSpider import baseUpdateBookStatusSpider
+import json
+from  content_spider.Util import md5
+
+
+name = 'baichuanzw'
+allowed_domains = ['fenxiao.baichuanzw.com']
+source = 'zy_baichuanzw'
+source_name = '百川'
+source_id = 20
+client_id = '37'
+key = 'ee09b1cd3fabc8f985dc9cb98e6214ab'
+base_url = 'http://fenxiao.baichuanzw.com/api/baichuan/{}?client_id='+client_id+'&sign={}'
+
+category = [
+    {'cate_id': '100001', 'cate_name': '原创', 'channel_id': 1, 'category_id': 1, 'category_name': '玄幻仙侠','ncategory_id': 23},
+    {'cate_id': '100004', 'cate_name': '玄幻', 'channel_id': 1, 'category_id': 1, 'category_name': '玄幻仙侠','ncategory_id': 23},
+    {'cate_id': '100005', 'cate_name': '仙侠', 'channel_id': 1, 'category_id': 1, 'category_name': '玄幻仙侠','ncategory_id': 21},
+    {'cate_id': '100006', 'cate_name': '军事', 'channel_id': 1, 'category_id': 4, 'category_name': '军事历史','ncategory_id': 51},
+    {'cate_id': '100007', 'cate_name': '历史', 'channel_id': 1, 'category_id': 4, 'category_name': '军事历史','ncategory_id': 14},
+    {'cate_id': '100008', 'cate_name': '都市', 'channel_id': 1, 'category_id': 3, 'category_name': '都市暧昧','ncategory_id': 54},
+    {'cate_id': '100015', 'cate_name': '奇幻', 'channel_id': 1, 'category_id': 1, 'category_name': '玄幻仙侠','ncategory_id': 22},
+    {'cate_id': '100016', 'cate_name': '游戏', 'channel_id': 1, 'category_id': 7, 'category_name': '游戏竞技','ncategory_id': 19},
+    {'cate_id': '100017', 'cate_name': '竞技', 'channel_id': 1, 'category_id': 7, 'category_name': '游戏竞技','ncategory_id': 19},
+    {'cate_id': '100018', 'cate_name': '灵异', 'channel_id': 1, 'category_id': 8, 'category_name': '悬疑灵异','ncategory_id': 81},
+    {'cate_id': '100019', 'cate_name': '科幻', 'channel_id': 1, 'category_id': 1, 'category_name': '玄幻仙侠','ncategory_id': 22},
+    {'cate_id': '100021', 'cate_name': '武侠', 'channel_id': 1, 'category_id': 1, 'category_name': '玄幻仙侠','ncategory_id': 21},
+    {'cate_id': '100026', 'cate_name': '官场', 'channel_id': 1, 'category_id': 3, 'category_name': '都市暧昧','ncategory_id': 55},
+    {'cate_id': '100027', 'cate_name': '商场', 'channel_id': 1, 'category_id': 3, 'category_name': '都市暧昧','ncategory_id': 55},
+    {'cate_id': '100043', 'cate_name': '悬疑', 'channel_id': 1, 'category_id': 8, 'category_name': '悬疑灵异','ncategory_id': 81},
+    {'cate_id': '100002', 'cate_name': '女频', 'channel_id': 2, 'category_id': 87, 'category_name': '现代言情','ncategory_id': 98},
+    {'cate_id': '100010', 'cate_name': '古言', 'channel_id': 2, 'category_id': 82, 'category_name': '古代言情','ncategory_id': 83},
+    {'cate_id': '100011', 'cate_name': '现言', 'channel_id': 2, 'category_id': 87, 'category_name': '现代言情','ncategory_id': 98},
+    {'cate_id': '100012', 'cate_name': '幻言', 'channel_id': 2, 'category_id': 93, 'category_name': '幻想言情','ncategory_id': 110},
+    {'cate_id': '100013', 'cate_name': '女生仙侠', 'channel_id': 2, 'category_id': 93, 'category_name': '幻想言情','ncategory_id': 97},
+    {'cate_id': '100014', 'cate_name': '青春', 'channel_id': 2, 'category_id': 101, 'category_name': '浪漫青春','ncategory_id': 103},
+    {'cate_id': '100020', 'cate_name': '穿越', 'channel_id': 2, 'category_id': 82, 'category_name': '古代言情','ncategory_id': 83},
+    {'cate_id': '100022', 'cate_name': '同人', 'channel_id': 2, 'category_id': 102, 'category_name': '耽美同人','ncategory_id': 106},
+    {'cate_id': '100023', 'cate_name': '女生灵异', 'channel_id': 2, 'category_id': 92, 'category_name': '女生灵异','ncategory_id': 95},
+    {'cate_id': '100024', 'cate_name': '重生', 'channel_id': 2, 'category_id': 82, 'category_name': '古代言情','ncategory_id': 83},
+    {'cate_id': '100025', 'cate_name': '婚恋', 'channel_id': 2, 'category_id': 87, 'category_name': '现代言情','ncategory_id': 98},
+    {'cate_id': '100031', 'cate_name': '女生悬疑', 'channel_id': 2, 'category_id': 92, 'category_name': '女生灵异','ncategory_id': 95},
+]
+
+
+def get_category(cate_id):
+    for item in category:
+        if str(item['cate_id']) == cate_id:
+            return item
+    return category[0]
+
+
+class baichuanzw(object):
+    name = name
+    allowed_domains = allowed_domains
+    source = source
+    source_name = source_name
+    source_id = source_id
+
+    def get_start_url(self):
+        sign = md5('{}{}'.format(self.client_id, self.key))
+        return base_url.format('getBookList', sign)
+
+    def bid_list_result(self, response):
+        result = json.loads(response.text)
+        if result is None or result.get('data') is None:
+            return []
+        result_list = []
+        for item in result['data']:
+            result_list.append({'id': item['id']})
+        return result_list
+
+    def get_book_info_url(self, bid):
+        sign = md5('{}{}{}'.format(self.client_id, self.key, bid))
+        return base_url.format('getBookInfo', sign) + '&book_id={}'.format(bid)
+
+    def book_info_result(self, response):
+        result = json.loads(response.text)
+        result = result['data']
+        category_info = get_category(result['category'])
+        return {
+            'bid': result['id'], 'name': result['name'], 'author': result['author'],
+            'intro': result['brief'], 'cover': result['cover'], 'keyword': result['tag'],
+            'status':  result['complete_status'],
+            'category': category_info['category_name'],'category_id':category_info['ncategory_id'],
+            'channel': category_info['channel_id']
+        }
+
+    def get_chapter_list_url(self, bid):
+        sign = md5('{}{}{}'.format(self.client_id, self.key, bid))
+        return base_url.format('getVolumeList', sign) + '&book_id={}'.format(bid)
+
+    def chapter_list_result(self, response):
+        result = json.loads(response.text)
+        if result is None or result.get('data') is None:
+            return []
+
+        result_list = []
+        i = 0
+        for volumeList in result['data']:
+            for chapter_item in volumeList['chapterlist']:
+                i = i+1
+                result_list.append({
+                    'source_chapter_id': chapter_item['id'], 'name': chapter_item['name'],
+                    'sequence': i, 'is_vip': 1 if i >= 20 else 0,
+                    'size': 0, 'recent_update_at': ''
+                })
+        return result_list
+
+    def get_chapter_content_url(self, bid, cid):
+        sign = md5('{}{}{}{}'.format(self.client_id, self.key, bid, cid))
+        return base_url.format('Chapterinfo', sign) + '&book_id={}&chapter_id={}'.format(bid, cid)
+
+    def chapter_content_result(self, response):
+        result = json.loads(response.text)
+        if result is None:
+            return {'content': ''}
+
+        return {
+            'content': result['data']['content'],
+            'size': len(result['data']['content'])
+        }
+
+
+class baichuanzwSpider(baichuanzw, baseSpider):
+    name = 'baichuanzw'
+    custom_settings = {
+        'DOWNLOAD_DELAY': 0.1,
+        'SOURCE': source,
+        'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log'
+    }
+
+
+class baichuanzwupdateSpider(baichuanzw, baseUpdateSpider):
+    name = 'baichuanzwupdate'
+    custom_settings = {
+        'DOWNLOAD_DELAY': 0.1,
+        'SOURCE': source,
+        'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log'
+    }
+
+
+class baichuanzwfixSpider(baichuanzw, fixChapterSpider):
+    name = 'baichuanzwfix'
+    custom_settings = {
+        'DOWNLOAD_DELAY': 0.1,
+        'SOURCE': source,
+        'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log'
+    }
+
+
+class baichuanzwBookInfoSpider(baichuanzw, baseUpdateBookStatusSpider):
+    name = 'baichuanzwbookstatusinfo'
+    custom_settings = {
+        'DOWNLOAD_DELAY': 0.1,
+        'SOURCE': source,
+        'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log'
+    }

二進制
content_spider/spiders/baichuanzw/百川内容输出规范20220518.pdf


二進制
content_spider/spiders/baichuanzw/百川分类ID.xls