# -*- coding: utf-8 -*- import random from content_spider.baseSpider import baseUpdateSpider from content_spider.baseSpider import baseUpdateBookStatusSpider import json import time class BookupdateSpider(baseUpdateSpider): name = 'kanshuupdate' allowed_domains = ['hezuo.lunjian.com'] source = 'zy_kanshu' source_name = '看书' source_id = 19 base_url = 'http://hezuo.lunjian.com/open/ksbook/{}?channel_id=10054' custom_settings = { 'DOWNLOAD_DELAY': 0.01, 'SOURCE': source, 'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log' } def get_chapter_list_url(self, bid): return self.base_url.format('ChapterLists') + '&bookid={}&random={}'.format(bid,random.randint(1,10000)) def chapter_list_result(self, response): result = json.loads(response.text) if result is None or result.get('data') is None: return [] result_list = [] i = 0 for chapter_item in result['data']: i = i + 1 result_list.append({ 'source_chapter_id': chapter_item['id'], 'name': chapter_item['title'], 'sequence': i, 'is_vip': 1 if chapter_item['isVip'] else 0, 'size': 0, 'recent_update_at': chapter_item['lastUpdateTime'] }) return result_list def get_chapter_content_url(self, bid, cid): return self.base_url.format('ChapterContent') + '&bookid={}&chapterid={}'.format(bid, cid) def chapter_content_result(self, response): result = json.loads(response.text) if result is None: return {'content': ''} return { 'content': result['data']['content'], 'size': len(result['data']['content']) } class BookupdateStatusSpider(baseUpdateBookStatusSpider): name = 'kanshuupdatestatus' allowed_domains = ['hezuo.lunjian.com'] source = 'zy_kanshu' source_name = '看书' source_id = 19 base_url = 'http://hezuo.lunjian.com/open/ksbook/{}?channel_id=10054' custom_settings = { 'DOWNLOAD_DELAY': 0.01, 'SOURCE': source, 'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log' } def get_book_info_url(self, bid): return self.base_url.format('BookDetail') + '&bookid={}'.format(bid) def book_info_result(self, response): result = json.loads(response.text) if result is None or result.get('data') is None: return None result = result['data'] return { 'bid': result['id'], 'name': result['bookTitle'], 'author': result['author'], 'intro': result['introduction'], 'cover': result['cover'], 'keyword': result['labels'], 'status': result['state'], 'category': result['category'],'category_id':1, 'channel': result['channelId'] }