# -*- coding: utf-8 -*- from content_spider.baseSpider import baseSpider from content_spider.baseSpider import baseUpdateSpider from content_spider.baseSpider import fixChapterSpider from content_spider.baseSpider import baseUpdateBookStatusSpider from content_spider.Util import md5 import time import json name = 'judian' allowed_domains = ['www.judianbook.com'] source = 'zy_judian' source_name = 'judian据点' source_id = 30 base_url = 'http://www.judianbook.com/index/api/{}?token=d5957e72f7711fd001ea29ddf0cff977e58c6342' category = [{'type': 9, 'name': '武侠', 'channel_id': 1, 'category_name': '武侠仙侠', 'category_id': 21}, {'type': 10, 'name': '仙侠', 'channel_id': 1, 'category_name': '武侠仙侠', 'category_id': 21}, {'type': 11, 'name': '都市', 'channel_id': 1, 'category_name': '都市爱情', 'category_id': 54}, {'type': 13, 'name': '历史', 'channel_id': 1, 'category_name': '特种军旅', 'category_id': 51}, {'type': 14, 'name': '游戏', 'channel_id': 1, 'category_name': '游戏竞技', 'category_id': 19}, {'type': 21, 'name': '玄幻', 'channel_id': 1, 'category_name': '玄幻奇幻', 'category_id': 23}, {'type': 38, 'name': '科幻', 'channel_id': 1, 'category_name': '玄幻奇幻', 'category_id': 23}, {'type': 44, 'name': '同人', 'channel_id': 1, 'category_name': '衍生同人', 'category_id': 125}, {'type': 49, 'name': '短篇男', 'channel_id': 1, 'category_name': '其他作品', 'category_id': 127}, {'type': 2, 'name': '古代言情', 'channel_id': 2, 'category_name': '穿越重生', 'category_id': 83}, {'type': 3, 'name': '现代言情', 'channel_id': 2, 'category_name': '婚恋情感', 'category_id': 98}, {'type': 4, 'name': '校 园言情', 'channel_id': 2, 'category_name': '青春校园', 'category_id': 104}, {'type': 5, 'name': '幻想言情', 'channel_id': 2, 'category_name': '东方玄幻', 'category_id': 96}, {'type': 22, 'name': '穿越重生', 'channel_id': 2, 'category_name': '穿越重生', 'category_id': 83}, {'type': 45, 'name': '耽美', 'channel_id': 2, 'category_name': '耽美同人', 'category_id': 106}, {'type': 46, 'name': '百合', 'channel_id': 2, 'category_name': '其他', 'category_id': 107}, {'type': 47, 'name': '无cp', 'channel_id': 2, 'category_name': '其他', 'category_id': 107}, {'type': 48, 'name': '短篇', 'channel_id': 2, 'category_name': '其他', 'category_id': 107}, {'type': 20, 'name': '推理悬念', 'channel_id': 1, 'category_name': '灵异恐 怖', 'category_id': 81}, {'type': 40, 'name': '恐怖惊悚', 'channel_id': 1, 'category_name': '灵异恐怖', 'category_id': 81}, {'type': 41, 'name': '灵异鬼怪', 'channel_id': 1, 'category_name': '灵异恐怖', 'category_id': 81}, {'type': 42, 'name': '风水秘术', 'channel_id': 1, 'category_name': '灵异恐怖', 'category_id': 81}, {'type': 43, 'name': '探险异闻', 'channel_id': 1, 'category_name': '灵异恐怖', 'category_id': 81}, {'type': 24, 'name': '励志', 'channel_id': 2, 'category_name': '其他', 'category_id': 107}, {'type': 25, 'name': '职场', 'channel_id': 2, 'category_name': '其他', 'category_id': 107}, {'type': 26, 'name': '生活', 'channel_id': 2, 'category_name': '其他', 'category_id': 107}, {'type': 28, 'name': ' 婚姻', 'channel_id': 2, 'category_name': '其他', 'category_id': 107}, {'type': 29, 'name': '教育', 'channel_id': 2, 'category_name': '其他', 'category_id': 107}, {'type': 35, 'name': '心理学', 'channel_id': 2, 'category_name': '其他', 'category_id': 107}, {'type': 36, 'name': '管理', 'channel_id': 2, 'category_name': '其他', 'category_id': 107}] def get_category(type): for item in category: if int(type) == item['type']: return item return category[0] class judianProcess(object): name = name allowed_domains = allowed_domains source = source source_name = source_name source_id = source_id def get_start_url(self): return base_url.format('BookList') + '&site=500' def bid_list_result(self, response): result = json.loads(response.text) if result is None or result.get('data') is None: return [] result_list = [] for item in result['data']: result_list.append({'id': item['bookid']}) return result_list def get_book_info_url(self, bid): return base_url.format('BookInfo') + '&book_id={}'.format(bid) def book_info_result(self, response): result = json.loads(response.text) result = result['data'] category_info = get_category(result['type']); return { 'bid': result['book_id'], 'name': result['book_name'], 'author': result['author'], 'intro': result['describe'], 'cover': result['cover'], 'keyword': result['tag'], 'status':result['state'], 'category': category_info['category_name'],'category_id':category_info['category_id'], 'channel': category_info['channel_id'] } def get_chapter_list_url(self, bid): return base_url.format('BookChapters') + '&book_id={}&size=3000'.format(bid) def chapter_list_result(self, response): result = json.loads(response.text) if result is None or result.get('data') is None: return [] result_list = [] i = 0 for chapter_item in result['data']: i = i+1 result_list.append({ 'source_chapter_id': chapter_item['chapnum'], 'name': chapter_item['name'], 'sequence': chapter_item['chapnum'], 'is_vip':chapter_item['is_pay'], 'size': chapter_item['word_count'], 'recent_update_at': '' }) return result_list def get_chapter_content_url(self, bid, cid): return base_url.format('BookChapterInfo') + '&book_id={}&chapnum={}&type=json'.format(bid, cid) def chapter_content_result(self, response): result = json.loads(response.text) if result is None: return {'content': ''} return { 'content': "\r\n".join(result['content']) } class judianSpider(judianProcess,baseSpider): name = name custom_settings = { 'DOWNLOAD_DELAY': 0.1, 'SOURCE': source, 'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log' } class judianUpdateSpider(judianProcess,baseUpdateSpider): name = name + "update" custom_settings = { 'DOWNLOAD_DELAY': 0.1, 'SOURCE': source, 'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log' } class judianFixSpider(judianProcess,fixChapterSpider): name = name + 'fix' custom_settings = { 'DOWNLOAD_DELAY': 0.1, 'SOURCE': source, 'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log' } class judianBookInfoSpider(judianProcess,baseUpdateBookStatusSpider): name = name + "bookinfo" custom_settings = { 'DOWNLOAD_DELAY': 0.1, 'SOURCE': source, 'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log' }