123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225 |
- # -*- coding: utf-8 -*-
- from content_spider.baseSpider import baseSpider
- from content_spider.baseSpider import fixBookInfoSpider
- from content_spider.Util import get_category_by_name
- import json
- import time
- category = [{'id': 19, 'name': '斗气升级', 'channel_id': 1, 'category_id': 23, 'category_name': '玄幻奇幻'},
- {'id': 20, 'name': '元素魔法', 'channel_id': 1, 'category_id': 23, 'category_name': '玄幻奇幻'},
- {'id': 21, 'name': '血族僵尸', 'channel_id': 1, 'category_id': 23, 'category_name': '玄幻奇幻'},
- {'id': 22, 'name': '东方玄幻', 'channel_id': 1, 'category_id': 23, 'category_name': '玄幻奇幻'},
- {'id': 23, 'name': '西方奇幻', 'channel_id': 1, 'category_id': 23, 'category_name': '玄幻奇幻'},
- {'id': 24, 'name': '异世大陆', 'channel_id': 1, 'category_id': 23, 'category_name': '玄幻奇幻'},
- {'id': 101, 'name': '异界征战', 'channel_id': 1, 'category_id': 23, 'category_name': '玄幻奇幻'},
- {'id': 26, 'name': '传统武侠', 'channel_id': 1, 'category_id': 21, 'category_name': '武侠仙侠'},
- {'id': 27, 'name': '古典仙 侠', 'channel_id': 1, 'category_id': 21, 'category_name': '武侠仙侠'},
- {'id': 29, 'name': '现代修真', 'channel_id': 1, 'category_id': 21, 'category_name': '武侠仙侠'},
- {'id': 30, 'name': '洪荒封神', 'channel_id': 1, 'category_id': 21, 'category_name': '武侠仙侠'},
- {'id': 14, 'name': '异术超能', 'channel_id': 1, 'category_id': 54, 'category_name': '都市爱情'},
- {'id': 15, 'name': '都市生活', 'channel_id': 1, 'category_id': 54, 'category_name': '都市爱情'},
- {'id': 16, 'name': '黑白两道', 'channel_id': 1, 'category_id': 54, 'category_name': '都市爱情'},
- {'id': 17, 'name': '官场商战', 'channel_id': 1, 'category_id': 54, 'category_name': '都市爱情'},
- {'id': 18, 'name': '青春校园', 'channel_id': 1, 'category_id': 54, 'category_name': '都市爱情'},
- {'id': 31, 'name': '架空历史', 'channel_id': 1, 'category_id': 51, 'category_name': '特种军旅'},
- {'id': 32, 'name': '军旅战争', 'channel_id': 1, 'category_id': 51, 'category_name': '特种军旅'},
- {'id': 102, 'name': '抗战烽火', 'channel_id': 1, 'category_id': 51, 'category_name': '特种 军旅'},
- {'id': 5, 'name': '侦探推理', 'channel_id': 1, 'category_id': 22, 'category_name': '西方玄幻'},
- {'id': 38, 'name': '灵异惊悚', 'channel_id': 1, 'category_id': 22, 'category_name': '西方玄幻'},
- {'id': 103, 'name': '未来世界', 'channel_id': 1, 'category_id': 22, 'category_name': '西方玄幻'},
- {'id': 104, 'name': '古武机甲', 'channel_id': 1, 'category_id': 22, 'category_name': '西方玄幻'},
- {'id': 105, 'name': '星际时空', 'channel_id': 1, 'category_id': 22, 'category_name': '西方玄幻'},
- {'id': 25, 'name': '游戏异界', 'channel_id': 1, 'category_id': 19, 'category_name': '游戏竞技'},
- {'id': 34, 'name': '虚拟网游', 'channel_id': 1, 'category_id': 19, 'category_name': '游戏竞技'},
- {'id': 35, 'name': '电子竞技', 'channel_id': 1, 'category_id': 19, 'category_name': '游戏竞技'},
- {'id': 36, 'name': '体育竞技', 'channel_id': 1, 'category_id': 19, 'category_name': '游戏竞技'},
- {'id': 1000, 'name': '名人传记', 'channel_id': 1, 'category_id': 127, 'category_name': '其他作品'},
- {'id': 1002, 'name': '经典名著', 'channel_id': 1, 'category_id': 127, 'category_name': '其他作品'},
- {'id': 1004, 'name': '传统文化', 'channel_id': 1, 'category_id': 127, 'category_name': '其他作品'},
- {'id': 1006, 'name': '人际社交', 'channel_id': 1, 'category_id': 127, 'category_name': '其他作品'},
- {'id': 2001, 'name': '科幻未来', 'channel_id': 1, 'category_id': 127, 'category_name': '其他作品'},
- {'id': 2002, 'name': '衍生同人', 'channel_id': 1, 'category_id': 127, 'category_name': '其他作品'},
- {'id': 2003, 'name': '古风穿越', 'channel_id': 1, 'category_id': 127, 'category_name': '其他作品'},
- {'id': 2004, 'name': '魔幻 奇幻', 'channel_id': 1, 'category_id': 127, 'category_name': '其他作品'},
- {'id': 2005, 'name': '游戏竞技', 'channel_id': 1, 'category_id': 127, 'category_name': '其他作品'},
- {'id': 2006, 'name': '悬疑烧脑', 'channel_id': 1, 'category_id': 127, 'category_name': '其他作品'},
- {'id': 2007, 'name': '都市幻想', 'channel_id': 1, 'category_id': 127, 'category_name': '其他作品'},
- {'id': 2008, 'name': '神秘灵异', 'channel_id': 1, 'category_id': 127, 'category_name': '其他作品'},
- {'id': 2009, 'name': '青春校园', 'channel_id': 1, 'category_id': 127, 'category_name': '其他作品'},
- {'id': 2010, 'name': '武侠仙侠', 'channel_id': 1, 'category_id': 127, 'category_name': '其他作品'},
- {'id': 2020, 'name': '历史军事', 'channel_id': 1, 'category_id': 127, 'category_name': '其他作品'},
- {'id': 48, 'name': '总裁豪门', 'channel_id': 2, 'category_id': 98, 'category_name': '婚恋情感'},
- {'id': 49, 'name': '职场白领', 'channel_id': 2, 'category_id': 98, 'category_name': '婚恋情感'},
- {'id': 50, 'name': '浪漫言情', 'channel_id': 2, 'category_id': 98, 'category_name': '婚恋情感'},
- {'id': 51, 'name': '婚姻家庭', 'channel_id': 2, 'category_id': 98, 'category_name': '婚恋情感'},
- {'id': 53, 'name': '情感纪实', 'channel_id': 2, 'category_id': 98, 'category_name': '婚恋情感'},
- {'id': 81, 'name': '军婚高干', 'channel_id': 2, 'category_id': 98, 'category_name': '婚恋情感'},
- {'id': 55, 'name': '花季雨季', 'channel_id': 2, 'category_id': 104, 'category_name': '青春校园'},
- {'id': 56, 'name': '成长励志', 'channel_id': 2, 'category_id': 104, 'category_name': '青春校园'},
- {'id': 57, 'name': '青春伤痛', 'channel_id': 2, 'category_id': 104, 'category_name': '青春校园'},
- {'id': 58, 'name': '校园生活', 'channel_id': 2, 'category_id': 104, 'category_name': '青春校园'},
- {'id': 59, 'name': '女尊天下', 'channel_id': 2, 'category_id': 123, 'category_name': '女尊王朝'},
- {'id': 60, 'name': '宫闱情仇', 'channel_id': 2, 'category_id': 120, 'category_name': '宫斗宅斗'},
- {'id': 61, 'name': '异国浪漫', 'channel_id': 2, 'category_id': 83, 'category_name': '穿越重生'},
- {'id': 62, 'name': '宅门世家', 'channel_id': 2, 'category_id': 120, 'category_name': '宫斗宅斗'},
- {'id': 80, 'name': '穿越言情', 'channel_id': 2, 'category_id': 83, 'category_name': '穿越重生'},
- {'id': 63, 'name': '仙侣情缘', 'channel_id': 2, 'category_id': 96, 'category_name': '东方玄幻'},
- {'id': 64, 'name': '妖精幻情', 'channel_id': 2, 'category_id': 96, 'category_name': '东方玄幻'},
- {'id': 65, 'name': '奇幻柔情', 'channel_id': 2, 'category_id': 96, 'category_name': '东方玄幻'},
- {'id': 66, 'name': '魔法异能', 'channel_id': 2, 'category_id': 96, 'category_name': '东方玄幻'},
- {'id': 67, 'name': ' 重生爱恋', 'channel_id': 2, 'category_id': 96, 'category_name': '东方玄幻'},
- {'id': 68, 'name': '反穿时空', 'channel_id': 2, 'category_id': 83, 'category_name': '穿越重生'},
- {'id': 69, 'name': '古代王朝', 'channel_id': 2, 'category_id': 83, 'category_name': '穿越重生'},
- {'id': 70, 'name': '架空历史', 'channel_id': 2, 'category_id': 83, 'category_name': '穿越重生'},
- {'id': 71, 'name': '前世今生', 'channel_id': 2, 'category_id': 83, 'category_name': '穿越重 生'},
- {'id': 120, 'name': '文艺', 'channel_id': 2, 'category_id': 83, 'category_name': '穿越重生'},
- {'id': 72, 'name': '科幻小说', 'channel_id': 2, 'category_id': 119, 'category_name': '游戏'},
- {'id': 73, 'name': '网游小说', 'channel_id': 2, 'category_id': 119, 'category_name': '游戏'},
- {'id': 74, 'name': '灵异恐怖', 'channel_id': 2, 'category_id': 119, 'category_name': '游戏'},
- {'id': 75, 'name': '推理小说', 'channel_id': 2, 'category_id': 119, 'category_name': '游戏'},
- {'id': 76, 'name': '纯爱浪漫', 'channel_id': 2, 'category_id': 106, 'category_name': '耽美同人'},
- {'id': 77, 'name': '耽美', 'channel_id': 2, 'category_id': 106, 'category_name': '耽美同人'},
- {'id': 78, 'name': '同人', 'channel_id': 2, 'category_id': 106, 'category_name': '耽美同人'},
- {'id': 79, 'name': '百合', 'channel_id': 2, 'category_id': 106, 'category_name': '耽美同人'},
- {'id': 1001, 'name': '名人传记', 'channel_id': 2, 'category_id': 107, 'category_name': '其他'},
- {'id': 1003, 'name': '经典名著', 'channel_id': 2, 'category_id': 107, 'category_name': '其他'},
- {'id': 1005, 'name': '传统文化', 'channel_id': 2, 'category_id': 107, 'category_name': '其他'},
- {'id': 1007, 'name': '人际社交', 'channel_id': 2, 'category_id': 107, 'category_name': '其他'},
- {'id': 2012, 'name': '衍 生言情', 'channel_id': 2, 'category_id': 103, 'category_name': '青春纯爱'},
- {'id': 2013, 'name': '衍生纯爱', 'channel_id': 2, 'category_id': 103, 'category_name': '青春纯爱'},
- {'id': 2014, 'name': '武侠仙侠', 'channel_id': 2, 'category_id': 96, 'category_name': '东方玄幻'},
- {'id': 2015, 'name': '古风历史', 'channel_id': 2, 'category_id': 107, 'category_name': '其他'},
- {'id': 2016, 'name': '青春恋爱', 'channel_id': 2, 'category_id': 103, 'category_name': ' 青春纯爱'},
- {'id': 2017, 'name': '脑洞幻想', 'channel_id': 2, 'category_id': 107, 'category_name': '其他'},
- {'id': 2018, 'name': '游戏悬疑', 'channel_id': 2, 'category_id': 119, 'category_name': '游戏'}]
- class BookSpider(baseSpider):
- name = 'kanshu'
- allowed_domains = ['hezuo.lunjian.com']
- source = 'zy_kanshu'
- source_name = '看书'
- source_id = 19
- base_url = 'http://hezuo.lunjian.com/open/ksbook/{}?channel_id=10054'
- custom_settings = {
- 'DOWNLOAD_DELAY': 0.01,
- 'SOURCE': source,
- 'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log'
- }
- def get_start_url(self):
- return self.base_url.format('bookLists')
- def bid_list_result(self, response):
- result = json.loads(response.text)
- if result is None:
- return []
- result_list = []
- for item in result['data']:
- result_list.append({'id': item['id']})
- return result_list
- def get_book_info_url(self, bid):
- return self.base_url.format('BookDetail') + '&bookid={}'.format(bid)
- def book_info_result(self, response):
- result = json.loads(response.text)
- if result is None or result.get('data') is None:
- return None
- result = result['data']
- category_info = get_category_by_name(result['category'])
- category_id = 0
- if category_info is not None:
- category_id = category_info['id']
- return {
- 'bid': result['id'], 'name': result['bookTitle'], 'author': result['author'],
- 'intro': result['introduction'], 'cover': result['cover'], 'keyword': result['labels'],
- 'status': result['state'], 'category': result['category'],'category_id':category_id,
- 'channel': result['channelId']
- }
- def get_chapter_list_url(self, bid):
- return self.base_url.format('ChapterLists') + '&bookid={}'.format(bid)
- def chapter_list_result(self, response):
- result = json.loads(response.text)
- if result is None or result.get('data') is None:
- return []
- result_list = []
- i = 0
- for chapter_item in result['data']:
- i = i+1
- result_list.append({
- 'source_chapter_id': chapter_item['id'], 'name': chapter_item['title'],
- 'sequence': i, 'is_vip': 1 if chapter_item['isVip'] else 0,
- 'size': 0, 'recent_update_at': chapter_item['lastUpdateTime']
- })
- return result_list
- def get_chapter_content_url(self, bid, cid):
- return self.base_url.format('ChapterContent') + '&bookid={}&chapterid={}'.format(bid, cid)
- def chapter_content_result(self, response):
- result = json.loads(response.text)
- if result is None:
- return {'content': ''}
- return {
- 'content': result['data']['content'],
- 'size': len(result['data']['content'])
- }
- class BookInfoFixSpider(fixBookInfoSpider):
- name = 'kanshubookinfofix'
- allowed_domains = ['hezuo.lunjian.com']
- source = 'zy_kanshu'
- source_name = '看书'
- source_id = 19
- base_url = 'http://hezuo.lunjian.com/open/ksbook/{}?channel_id=10054'
- custom_settings = {
- 'DOWNLOAD_DELAY': 0.01,
- 'SOURCE': source,
- 'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log'
- }
- def get_start_url(self):
- return self.base_url.format('bookLists')
- def bid_list_result(self, response):
- result = json.loads(response.text)
- if result is None:
- return []
- result_list = []
- for item in result['data']:
- result_list.append({'id': item['id']})
- return result_list
- def get_book_info_url(self, bid):
- return self.base_url.format('BookDetail') + '&bookid={}'.format(bid)
- def book_info_result(self, response):
- result = json.loads(response.text)
- if result is None or result.get('data') is None:
- return None
- result = result['data']
- category_info = get_category_by_name(result['category'])
- category_id = 0
- if category_info is not None:
- category_id = category_info['id']
- return {
- 'bid': result['id'], 'name': result['bookTitle'], 'author': result['author'],
- 'intro': result['introduction'], 'cover': result['cover'], 'keyword': result['labels'],
- 'status': result['state'], 'category': result['category'],'category_id':category_id,
- 'channel': result['channelId']
- }
|