# -*- coding: utf-8 -*- from content_spider.baseSpider import fixChapterSpider import json import time class bookFixSpider(fixChapterSpider): name = 'kanshufix' allowed_domains = ['hezuo.lunjian.com'] source = 'zy_kanshu' source_name = '看书' source_id = 19 base_url = 'http://hezuo.lunjian.com/open/ksbook/{}?channel_id=10054' custom_settings = { 'DOWNLOAD_DELAY': 0.01, 'SOURCE': source, 'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log' } def get_chapter_list_url(self, bid): return self.base_url.format('ChapterLists') + '&bookid={}'.format(bid) def chapter_list_result(self, response): result = json.loads(response.text) if result is None or result.get('data') is None: return [] result_list = [] i = 0 for chapter_item in result['data']: i = i+1 result_list.append({ 'source_chapter_id': chapter_item['id'], 'name': chapter_item['title'], 'sequence': i, 'is_vip': 1 if chapter_item['isVip'] else 0, 'size': 0, 'recent_update_at': chapter_item['lastUpdateTime'] }) return result_list def get_chapter_content_url(self, bid, cid): return self.base_url.format('ChapterContent') + '&bookid={}&chapterid={}'.format(bid, cid) def chapter_content_result(self, response): result = json.loads(response.text) if result is None: return {'content': ''} return { 'content': result['data']['content'], 'size': len(result['data']['content']) }