# -*- coding: utf-8 -*- import time from content_spider.baseSpider import baseSpider from content_spider.pipelines import formatcontent from content_spider.baseSpider import baseUpdateSpider from content_spider.baseSpider import fixChapterSpider from content_spider.baseSpider import baseUpdateBookStatusSpider import json from content_spider.Util import md5 name = 'baichuanzw' allowed_domains = ['fenxiao.baichuanzw.com'] source = 'zy_baichuan' source_name = 'baichuan百川' source_id = 27 client_id = '37' key = 'ee09b1cd3fabc8f985dc9cb98e6214ab' base_url = 'http://fenxiao.baichuanzw.com/api/baichuan/{}?client_id='+client_id+'&sign={}' category = [ {'cate_id': '100001', 'cate_name': '原创', 'channel_id': 1, 'category_id': 1, 'category_name': '玄幻仙侠','ncategory_id': 23}, {'cate_id': '100004', 'cate_name': '玄幻', 'channel_id': 1, 'category_id': 1, 'category_name': '玄幻仙侠','ncategory_id': 23}, {'cate_id': '100005', 'cate_name': '仙侠', 'channel_id': 1, 'category_id': 1, 'category_name': '玄幻仙侠','ncategory_id': 21}, {'cate_id': '100006', 'cate_name': '军事', 'channel_id': 1, 'category_id': 4, 'category_name': '军事历史','ncategory_id': 51}, {'cate_id': '100007', 'cate_name': '历史', 'channel_id': 1, 'category_id': 4, 'category_name': '军事历史','ncategory_id': 14}, {'cate_id': '100008', 'cate_name': '都市', 'channel_id': 1, 'category_id': 3, 'category_name': '都市暧昧','ncategory_id': 54}, {'cate_id': '100015', 'cate_name': '奇幻', 'channel_id': 1, 'category_id': 1, 'category_name': '玄幻仙侠','ncategory_id': 22}, {'cate_id': '100016', 'cate_name': '游戏', 'channel_id': 1, 'category_id': 7, 'category_name': '游戏竞技','ncategory_id': 19}, {'cate_id': '100017', 'cate_name': '竞技', 'channel_id': 1, 'category_id': 7, 'category_name': '游戏竞技','ncategory_id': 19}, {'cate_id': '100018', 'cate_name': '灵异', 'channel_id': 1, 'category_id': 8, 'category_name': '悬疑灵异','ncategory_id': 81}, {'cate_id': '100019', 'cate_name': '科幻', 'channel_id': 1, 'category_id': 1, 'category_name': '玄幻仙侠','ncategory_id': 22}, {'cate_id': '100021', 'cate_name': '武侠', 'channel_id': 1, 'category_id': 1, 'category_name': '玄幻仙侠','ncategory_id': 21}, {'cate_id': '100026', 'cate_name': '官场', 'channel_id': 1, 'category_id': 3, 'category_name': '都市暧昧','ncategory_id': 55}, {'cate_id': '100027', 'cate_name': '商场', 'channel_id': 1, 'category_id': 3, 'category_name': '都市暧昧','ncategory_id': 55}, {'cate_id': '100043', 'cate_name': '悬疑', 'channel_id': 1, 'category_id': 8, 'category_name': '悬疑灵异','ncategory_id': 81}, {'cate_id': '100002', 'cate_name': '女频', 'channel_id': 2, 'category_id': 87, 'category_name': '现代言情','ncategory_id': 98}, {'cate_id': '100010', 'cate_name': '古言', 'channel_id': 2, 'category_id': 82, 'category_name': '古代言情','ncategory_id': 83}, {'cate_id': '100011', 'cate_name': '现言', 'channel_id': 2, 'category_id': 87, 'category_name': '现代言情','ncategory_id': 98}, {'cate_id': '100012', 'cate_name': '幻言', 'channel_id': 2, 'category_id': 93, 'category_name': '幻想言情','ncategory_id': 110}, {'cate_id': '100013', 'cate_name': '女生仙侠', 'channel_id': 2, 'category_id': 93, 'category_name': '幻想言情','ncategory_id': 97}, {'cate_id': '100014', 'cate_name': '青春', 'channel_id': 2, 'category_id': 101, 'category_name': '浪漫青春','ncategory_id': 103}, {'cate_id': '100020', 'cate_name': '穿越', 'channel_id': 2, 'category_id': 82, 'category_name': '古代言情','ncategory_id': 83}, {'cate_id': '100022', 'cate_name': '同人', 'channel_id': 2, 'category_id': 102, 'category_name': '耽美同人','ncategory_id': 106}, {'cate_id': '100023', 'cate_name': '女生灵异', 'channel_id': 2, 'category_id': 92, 'category_name': '女生灵异','ncategory_id': 95}, {'cate_id': '100024', 'cate_name': '重生', 'channel_id': 2, 'category_id': 82, 'category_name': '古代言情','ncategory_id': 83}, {'cate_id': '100025', 'cate_name': '婚恋', 'channel_id': 2, 'category_id': 87, 'category_name': '现代言情','ncategory_id': 98}, {'cate_id': '100031', 'cate_name': '女生悬疑', 'channel_id': 2, 'category_id': 92, 'category_name': '女生灵异','ncategory_id': 95}, ] def get_category(cate_id): for item in category: if str(item['cate_id']) == cate_id: return item return category[0] class baichuanzw(object): name = name allowed_domains = allowed_domains source = source source_name = source_name source_id = source_id def get_start_url(self): sign = md5('{}{}'.format(client_id, key)) return base_url.format('getBookList', sign) def bid_list_result(self, response): result = json.loads(response.text) if result is None or result.get('data') is None: return [] result_list = [] for item in result['data']: result_list.append({'id': item['id']}) return result_list def get_book_info_url(self, bid): sign = md5('{}{}{}'.format(client_id, key, bid)) return base_url.format('getBookInfo', sign) + '&book_id={}'.format(bid) def book_info_result(self, response): result = json.loads(response.text) result = result['data'] category_info = get_category(result['category']) return { 'bid': result['id'], 'name': result['name'], 'author': result['author'], 'intro': result['brief'], 'cover': result['cover'], 'keyword': result['tag'], 'status': result['complete_status'], 'category': category_info['category_name'],'category_id':category_info['ncategory_id'], 'channel': category_info['channel_id'] } def get_chapter_list_url(self, bid): sign = md5('{}{}{}'.format(client_id, key, bid)) return base_url.format('getVolumeList', sign) + '&book_id={}'.format(bid) def chapter_list_result(self, response): result = json.loads(response.text) if result is None or result.get('data') is None: return [] result_list = [] i = 0 for volumeList in result['data']: for chapter_item in volumeList['chapterlist']: i = i+1 result_list.append({ 'source_chapter_id': chapter_item['id'], 'name': chapter_item['name'], 'sequence': i, 'is_vip': 1 if i >= 20 else 0, 'size': 0, 'recent_update_at': '' }) return result_list def get_chapter_content_url(self, bid, cid): sign = md5('{}{}{}{}'.format(client_id, key, bid, cid)) return base_url.format('Chapterinfo', sign) + '&book_id={}&chapter_id={}'.format(bid, cid) def chapter_content_result(self, response): result = json.loads(response.text) if result is None: return {'content': ''} return { 'content': result['data']['content'], 'size': len(result['data']['content']) } class baichuanzwSpider(baichuanzw, baseSpider): name = 'baichuanzw' custom_settings = { 'DOWNLOAD_DELAY': 0.1, 'SOURCE': source, 'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log' } class baichuanzwupdateSpider(baichuanzw, baseUpdateSpider): name = 'baichuanzwupdate' custom_settings = { 'DOWNLOAD_DELAY': 0.1, 'SOURCE': source, 'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log' } class baichuanzwfixSpider(baichuanzw, fixChapterSpider): name = 'baichuanzwfix' custom_settings = { 'DOWNLOAD_DELAY': 0.1, 'SOURCE': source, 'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log' } class baichuanzwBookInfoSpider(baichuanzw, baseUpdateBookStatusSpider): name = 'baichuanzwbookstatusinfo' custom_settings = { 'DOWNLOAD_DELAY': 0.1, 'SOURCE': source, 'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log' }