|
@@ -0,0 +1,165 @@
|
|
|
+# -*- coding: utf-8 -*-
|
|
|
+
|
|
|
+import time
|
|
|
+from content_spider.baseSpider import baseSpider
|
|
|
+from content_spider.pipelines import formatcontent
|
|
|
+from content_spider.baseSpider import baseUpdateSpider
|
|
|
+from content_spider.baseSpider import fixChapterSpider
|
|
|
+from content_spider.baseSpider import baseUpdateBookStatusSpider
|
|
|
+import json
|
|
|
+from content_spider.Util import md5
|
|
|
+
|
|
|
+
|
|
|
+name = 'baichuanzw'
|
|
|
+allowed_domains = ['fenxiao.baichuanzw.com']
|
|
|
+source = 'zy_baichuanzw'
|
|
|
+source_name = '百川'
|
|
|
+source_id = 20
|
|
|
+client_id = '37'
|
|
|
+key = 'ee09b1cd3fabc8f985dc9cb98e6214ab'
|
|
|
+base_url = 'http://fenxiao.baichuanzw.com/api/baichuan/{}?client_id='+client_id+'&sign={}'
|
|
|
+
|
|
|
+category = [
|
|
|
+ {'cate_id': '100001', 'cate_name': '原创', 'channel_id': 1, 'category_id': 1, 'category_name': '玄幻仙侠','ncategory_id': 23},
|
|
|
+ {'cate_id': '100004', 'cate_name': '玄幻', 'channel_id': 1, 'category_id': 1, 'category_name': '玄幻仙侠','ncategory_id': 23},
|
|
|
+ {'cate_id': '100005', 'cate_name': '仙侠', 'channel_id': 1, 'category_id': 1, 'category_name': '玄幻仙侠','ncategory_id': 21},
|
|
|
+ {'cate_id': '100006', 'cate_name': '军事', 'channel_id': 1, 'category_id': 4, 'category_name': '军事历史','ncategory_id': 51},
|
|
|
+ {'cate_id': '100007', 'cate_name': '历史', 'channel_id': 1, 'category_id': 4, 'category_name': '军事历史','ncategory_id': 14},
|
|
|
+ {'cate_id': '100008', 'cate_name': '都市', 'channel_id': 1, 'category_id': 3, 'category_name': '都市暧昧','ncategory_id': 54},
|
|
|
+ {'cate_id': '100015', 'cate_name': '奇幻', 'channel_id': 1, 'category_id': 1, 'category_name': '玄幻仙侠','ncategory_id': 22},
|
|
|
+ {'cate_id': '100016', 'cate_name': '游戏', 'channel_id': 1, 'category_id': 7, 'category_name': '游戏竞技','ncategory_id': 19},
|
|
|
+ {'cate_id': '100017', 'cate_name': '竞技', 'channel_id': 1, 'category_id': 7, 'category_name': '游戏竞技','ncategory_id': 19},
|
|
|
+ {'cate_id': '100018', 'cate_name': '灵异', 'channel_id': 1, 'category_id': 8, 'category_name': '悬疑灵异','ncategory_id': 81},
|
|
|
+ {'cate_id': '100019', 'cate_name': '科幻', 'channel_id': 1, 'category_id': 1, 'category_name': '玄幻仙侠','ncategory_id': 22},
|
|
|
+ {'cate_id': '100021', 'cate_name': '武侠', 'channel_id': 1, 'category_id': 1, 'category_name': '玄幻仙侠','ncategory_id': 21},
|
|
|
+ {'cate_id': '100026', 'cate_name': '官场', 'channel_id': 1, 'category_id': 3, 'category_name': '都市暧昧','ncategory_id': 55},
|
|
|
+ {'cate_id': '100027', 'cate_name': '商场', 'channel_id': 1, 'category_id': 3, 'category_name': '都市暧昧','ncategory_id': 55},
|
|
|
+ {'cate_id': '100043', 'cate_name': '悬疑', 'channel_id': 1, 'category_id': 8, 'category_name': '悬疑灵异','ncategory_id': 81},
|
|
|
+ {'cate_id': '100002', 'cate_name': '女频', 'channel_id': 2, 'category_id': 87, 'category_name': '现代言情','ncategory_id': 98},
|
|
|
+ {'cate_id': '100010', 'cate_name': '古言', 'channel_id': 2, 'category_id': 82, 'category_name': '古代言情','ncategory_id': 83},
|
|
|
+ {'cate_id': '100011', 'cate_name': '现言', 'channel_id': 2, 'category_id': 87, 'category_name': '现代言情','ncategory_id': 98},
|
|
|
+ {'cate_id': '100012', 'cate_name': '幻言', 'channel_id': 2, 'category_id': 93, 'category_name': '幻想言情','ncategory_id': 110},
|
|
|
+ {'cate_id': '100013', 'cate_name': '女生仙侠', 'channel_id': 2, 'category_id': 93, 'category_name': '幻想言情','ncategory_id': 97},
|
|
|
+ {'cate_id': '100014', 'cate_name': '青春', 'channel_id': 2, 'category_id': 101, 'category_name': '浪漫青春','ncategory_id': 103},
|
|
|
+ {'cate_id': '100020', 'cate_name': '穿越', 'channel_id': 2, 'category_id': 82, 'category_name': '古代言情','ncategory_id': 83},
|
|
|
+ {'cate_id': '100022', 'cate_name': '同人', 'channel_id': 2, 'category_id': 102, 'category_name': '耽美同人','ncategory_id': 106},
|
|
|
+ {'cate_id': '100023', 'cate_name': '女生灵异', 'channel_id': 2, 'category_id': 92, 'category_name': '女生灵异','ncategory_id': 95},
|
|
|
+ {'cate_id': '100024', 'cate_name': '重生', 'channel_id': 2, 'category_id': 82, 'category_name': '古代言情','ncategory_id': 83},
|
|
|
+ {'cate_id': '100025', 'cate_name': '婚恋', 'channel_id': 2, 'category_id': 87, 'category_name': '现代言情','ncategory_id': 98},
|
|
|
+ {'cate_id': '100031', 'cate_name': '女生悬疑', 'channel_id': 2, 'category_id': 92, 'category_name': '女生灵异','ncategory_id': 95},
|
|
|
+]
|
|
|
+
|
|
|
+
|
|
|
+def get_category(cate_id):
|
|
|
+ for item in category:
|
|
|
+ if str(item['cate_id']) == cate_id:
|
|
|
+ return item
|
|
|
+ return category[0]
|
|
|
+
|
|
|
+
|
|
|
+class baichuanzw(object):
|
|
|
+ name = name
|
|
|
+ allowed_domains = allowed_domains
|
|
|
+ source = source
|
|
|
+ source_name = source_name
|
|
|
+ source_id = source_id
|
|
|
+
|
|
|
+ def get_start_url(self):
|
|
|
+ sign = md5('{}{}'.format(self.client_id, self.key))
|
|
|
+ return base_url.format('getBookList', sign)
|
|
|
+
|
|
|
+ def bid_list_result(self, response):
|
|
|
+ result = json.loads(response.text)
|
|
|
+ if result is None or result.get('data') is None:
|
|
|
+ return []
|
|
|
+ result_list = []
|
|
|
+ for item in result['data']:
|
|
|
+ result_list.append({'id': item['id']})
|
|
|
+ return result_list
|
|
|
+
|
|
|
+ def get_book_info_url(self, bid):
|
|
|
+ sign = md5('{}{}{}'.format(self.client_id, self.key, bid))
|
|
|
+ return base_url.format('getBookInfo', sign) + '&book_id={}'.format(bid)
|
|
|
+
|
|
|
+ def book_info_result(self, response):
|
|
|
+ result = json.loads(response.text)
|
|
|
+ result = result['data']
|
|
|
+ category_info = get_category(result['category'])
|
|
|
+ return {
|
|
|
+ 'bid': result['id'], 'name': result['name'], 'author': result['author'],
|
|
|
+ 'intro': result['brief'], 'cover': result['cover'], 'keyword': result['tag'],
|
|
|
+ 'status': result['complete_status'],
|
|
|
+ 'category': category_info['category_name'],'category_id':category_info['ncategory_id'],
|
|
|
+ 'channel': category_info['channel_id']
|
|
|
+ }
|
|
|
+
|
|
|
+ def get_chapter_list_url(self, bid):
|
|
|
+ sign = md5('{}{}{}'.format(self.client_id, self.key, bid))
|
|
|
+ return base_url.format('getVolumeList', sign) + '&book_id={}'.format(bid)
|
|
|
+
|
|
|
+ def chapter_list_result(self, response):
|
|
|
+ result = json.loads(response.text)
|
|
|
+ if result is None or result.get('data') is None:
|
|
|
+ return []
|
|
|
+
|
|
|
+ result_list = []
|
|
|
+ i = 0
|
|
|
+ for volumeList in result['data']:
|
|
|
+ for chapter_item in volumeList['chapterlist']:
|
|
|
+ i = i+1
|
|
|
+ result_list.append({
|
|
|
+ 'source_chapter_id': chapter_item['id'], 'name': chapter_item['name'],
|
|
|
+ 'sequence': i, 'is_vip': 1 if i >= 20 else 0,
|
|
|
+ 'size': 0, 'recent_update_at': ''
|
|
|
+ })
|
|
|
+ return result_list
|
|
|
+
|
|
|
+ def get_chapter_content_url(self, bid, cid):
|
|
|
+ sign = md5('{}{}{}{}'.format(self.client_id, self.key, bid, cid))
|
|
|
+ return base_url.format('Chapterinfo', sign) + '&book_id={}&chapter_id={}'.format(bid, cid)
|
|
|
+
|
|
|
+ def chapter_content_result(self, response):
|
|
|
+ result = json.loads(response.text)
|
|
|
+ if result is None:
|
|
|
+ return {'content': ''}
|
|
|
+
|
|
|
+ return {
|
|
|
+ 'content': result['data']['content'],
|
|
|
+ 'size': len(result['data']['content'])
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+class baichuanzwSpider(baichuanzw, baseSpider):
|
|
|
+ name = 'baichuanzw'
|
|
|
+ custom_settings = {
|
|
|
+ 'DOWNLOAD_DELAY': 0.1,
|
|
|
+ 'SOURCE': source,
|
|
|
+ 'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log'
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+class baichuanzwupdateSpider(baichuanzw, baseUpdateSpider):
|
|
|
+ name = 'baichuanzwupdate'
|
|
|
+ custom_settings = {
|
|
|
+ 'DOWNLOAD_DELAY': 0.1,
|
|
|
+ 'SOURCE': source,
|
|
|
+ 'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log'
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+class baichuanzwfixSpider(baichuanzw, fixChapterSpider):
|
|
|
+ name = 'baichuanzwfix'
|
|
|
+ custom_settings = {
|
|
|
+ 'DOWNLOAD_DELAY': 0.1,
|
|
|
+ 'SOURCE': source,
|
|
|
+ 'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log'
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+class baichuanzwBookInfoSpider(baichuanzw, baseUpdateBookStatusSpider):
|
|
|
+ name = 'baichuanzwbookstatusinfo'
|
|
|
+ custom_settings = {
|
|
|
+ 'DOWNLOAD_DELAY': 0.1,
|
|
|
+ 'SOURCE': source,
|
|
|
+ 'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log'
|
|
|
+ }
|