123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687 |
- # -*- coding: utf-8 -*-
- import random
- from content_spider.baseSpider import baseUpdateSpider
- from content_spider.baseSpider import baseUpdateBookStatusSpider
- import json
- import time
- class BookupdateSpider(baseUpdateSpider):
- name = 'kanshuupdate'
- allowed_domains = ['hezuo.lunjian.com']
- source = 'zy_kanshu'
- source_name = '看书'
- source_id = 19
- base_url = 'http://hezuo.lunjian.com/open/ksbook/{}?channel_id=10054'
- custom_settings = {
- 'DOWNLOAD_DELAY': 0.01,
- 'SOURCE': source,
- 'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log'
- }
- def get_chapter_list_url(self, bid):
- return self.base_url.format('ChapterLists') + '&bookid={}&random={}'.format(bid,random.randint(1,10000))
- def chapter_list_result(self, response):
- result = json.loads(response.text)
- if result is None or result.get('data') is None:
- return []
- result_list = []
- i = 0
- for chapter_item in result['data']:
- i = i + 1
- result_list.append({
- 'source_chapter_id': chapter_item['id'], 'name': chapter_item['title'],
- 'sequence': i, 'is_vip': 1 if chapter_item['isVip'] else 0,
- 'size': 0, 'recent_update_at': chapter_item['lastUpdateTime']
- })
- return result_list
- def get_chapter_content_url(self, bid, cid):
- return self.base_url.format('ChapterContent') + '&bookid={}&chapterid={}'.format(bid, cid)
- def chapter_content_result(self, response):
- result = json.loads(response.text)
- if result is None:
- return {'content': ''}
- return {
- 'content': result['data']['content'],
- 'size': len(result['data']['content'])
- }
-
- class BookupdateStatusSpider(baseUpdateBookStatusSpider):
- name = 'kanshuupdatestatus'
- allowed_domains = ['hezuo.lunjian.com']
- source = 'zy_kanshu'
- source_name = '看书'
- source_id = 19
- base_url = 'http://hezuo.lunjian.com/open/ksbook/{}?channel_id=10054'
- custom_settings = {
- 'DOWNLOAD_DELAY': 0.01,
- 'SOURCE': source,
- 'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log'
- }
- def get_book_info_url(self, bid):
- return self.base_url.format('BookDetail') + '&bookid={}'.format(bid)
- def book_info_result(self, response):
- result = json.loads(response.text)
- if result is None or result.get('data') is None:
- return None
- result = result['data']
- return {
- 'bid': result['id'], 'name': result['bookTitle'], 'author': result['author'],
- 'intro': result['introduction'], 'cover': result['cover'], 'keyword': result['labels'],
- 'status': result['state'], 'category': result['category'],'category_id':1,
- 'channel': result['channelId']
- }
|