|
@@ -0,0 +1,262 @@
|
|
|
+# -*- coding: utf-8 -*-
|
|
|
+
|
|
|
+from content_spider.baseSpider import baseSpider
|
|
|
+from content_spider.baseSpider import baseUpdateSpider
|
|
|
+from content_spider.baseSpider import fixChapterSpider
|
|
|
+from content_spider.baseSpider import baseUpdateBookStatusSpider
|
|
|
+from content_spider.Util import get_category_by_id
|
|
|
+from content_spider.Util import md5
|
|
|
+import time
|
|
|
+import json
|
|
|
+
|
|
|
+
|
|
|
+name = 'liuyue'
|
|
|
+allowed_domains = ['api.6yxs.com']
|
|
|
+source = 'zy_liuyue'
|
|
|
+source_name = 'liuyue六月'
|
|
|
+source_id = 33
|
|
|
+base_url = 'http://api.6yxs.com/Open'
|
|
|
+doc = 'https://docs.qq.com/doc/DZm1ic0RiWENWelNx?u=2ddd749abc4a466e8277cd2c98def851'
|
|
|
+apiname = 'zhiyu'
|
|
|
+
|
|
|
+
|
|
|
+category = [
|
|
|
+ {'channel_id': 1, 'ncategory_id': 21, 'name': '东方玄幻', 'my_category_name': '玄幻仙侠', 'my_category_id': 1,
|
|
|
+ 'pname': '玄幻奇幻', 'id': 13},
|
|
|
+ {'channel_id': 1, 'ncategory_id': 22, 'name': '西方玄幻', 'my_category_name': '玄幻仙侠', 'my_category_id': 1,
|
|
|
+ 'pname': '玄幻奇幻', 'id': 14},
|
|
|
+ {'channel_id': 1, 'ncategory_id': 21, 'name': '传统武侠', 'my_category_name': '玄幻仙侠', 'my_category_id': 1,
|
|
|
+ 'pname': '武侠仙侠', 'id': 15},
|
|
|
+ {'channel_id': 1, 'ncategory_id': 21, 'name': '古典仙侠', 'my_category_name': '玄幻仙侠', 'my_category_id': 1,
|
|
|
+ 'pname': '武侠仙侠', 'id': 16},
|
|
|
+ {'channel_id': 1, 'ncategory_id': 54, 'name': '都市生活', 'my_category_name': '都市暧昧', 'my_category_id': 3,
|
|
|
+ 'pname': '都市职场', 'id': 17},
|
|
|
+ {'channel_id': 1, 'ncategory_id': 68, 'name': '异能超能', 'my_category_name': '都市暧昧', 'my_category_id': 3,
|
|
|
+ 'pname': '都市职场', 'id': 18},
|
|
|
+ {'channel_id': 1, 'ncategory_id': 67, 'name': '青春校园', 'my_category_name': '热血校园', 'my_category_id': 2,
|
|
|
+ 'pname': '都市职场', 'id': 19},
|
|
|
+ {'channel_id': 1, 'ncategory_id': 55, 'name': '官场商战', 'my_category_name': '都市暧昧', 'my_category_id': 3,
|
|
|
+ 'pname': '都市职场', 'id': 20},
|
|
|
+ {'channel_id': 1, 'ncategory_id': 14, 'name': '架空历史', 'my_category_name': '军事历史', 'my_category_id': 4,
|
|
|
+ 'pname': '历史军事', 'id': 21},
|
|
|
+ {'channel_id': 1, 'ncategory_id': 14, 'name': '纪实历史', 'my_category_name': '军事历史', 'my_category_id': 4,
|
|
|
+ 'pname': '历史军事', 'id': 22},
|
|
|
+ {'channel_id': 1, 'ncategory_id': 51, 'name': '军事战争', 'my_category_name': '军事历史', 'my_category_id': 4,
|
|
|
+ 'pname': '历史军事', 'id': 23},
|
|
|
+ {'channel_id': 1, 'ncategory_id': 51, 'name': '军旅幻想', 'my_category_name': '军事历史', 'my_category_id': 4,
|
|
|
+ 'pname': '历史军事', 'id': 24},
|
|
|
+ {'channel_id': 1, 'ncategory_id': 19, 'name': '游戏生涯', 'my_category_name': '游戏竞技', 'my_category_id': 7,
|
|
|
+ 'pname': '游戏竞技', 'id': 25},
|
|
|
+ {'channel_id': 1, 'ncategory_id': 19, 'name': '游戏异界', 'my_category_name': '游戏竞技', 'my_category_id': 7,
|
|
|
+ 'pname': '游戏竞技', 'id': 26},
|
|
|
+ {'channel_id': 1, 'ncategory_id': 19, 'name': '体育竞技', 'my_category_name': '游戏竞技', 'my_category_id': 7,
|
|
|
+ 'pname': '游戏竞技', 'id': 27},
|
|
|
+ {'channel_id': 1, 'ncategory_id': 127, 'name': '未来世界', 'my_category_name': '其他', 'my_category_id': 124,
|
|
|
+ 'pname': '科幻灵异', 'id': 28},
|
|
|
+ {'channel_id': 1, 'ncategory_id': 127, 'name': '末世危机', 'my_category_name': '其他', 'my_category_id': 124,
|
|
|
+ 'pname': '科幻灵异', 'id': 29},
|
|
|
+ {'channel_id': 1, 'ncategory_id': 81, 'name': '推理侦探', 'my_category_name': '悬疑灵异', 'my_category_id': 8,
|
|
|
+ 'pname': '科幻灵异', 'id': 30},
|
|
|
+ {'channel_id': 1, 'ncategory_id': 81, 'name': '悬疑探险', 'my_category_name': '悬疑灵异', 'my_category_id': 8,
|
|
|
+ 'pname': '科幻灵异', 'id': 31},
|
|
|
+ {'channel_id': 2, 'ncategory_id': 88, 'name': '豪门世家', 'my_category_name': '现代言情', 'my_category_id': 87,
|
|
|
+ 'pname': '现代言情', 'id': 32},
|
|
|
+ {'channel_id': 2, 'ncategory_id': 117, 'name': '娱乐明星', 'my_category_name': '现代言情', 'my_category_id': 87,
|
|
|
+ 'pname': '现代言情', 'id': 33},
|
|
|
+ {'channel_id': 2, 'ncategory_id': 98, 'name': '婚恋情缘', 'my_category_name': '现代言情', 'my_category_id': 87,
|
|
|
+ 'pname': '现代言情', 'id': 34},
|
|
|
+ {'channel_id': 2, 'ncategory_id': 103, 'name': '青春纯爱', 'my_category_name': '浪漫青春', 'my_category_id': 101,
|
|
|
+ 'pname': '浪漫青春', 'id': 35},
|
|
|
+ {'channel_id': 2, 'ncategory_id': 105, 'name': '青春疼痛', 'my_category_name': '浪漫青春', 'my_category_id': 101,
|
|
|
+ 'pname': '浪漫青春', 'id': 36},
|
|
|
+ {'channel_id': 2, 'ncategory_id': 104, 'name': '青春校园', 'my_category_name': '浪漫青春', 'my_category_id': 101,
|
|
|
+ 'pname': '浪漫青春', 'id': 37},
|
|
|
+ {'channel_id': 2, 'ncategory_id': 83, 'name': '古代架空', 'my_category_name': '古代言情', 'my_category_id': 82,
|
|
|
+ 'pname': '古代言情', 'id': 38},
|
|
|
+ {'channel_id': 2, 'ncategory_id': 120, 'name': '宫闱宅斗', 'my_category_name': '古代言情', 'my_category_id': 82,
|
|
|
+ 'pname': '古代言情', 'id': 39},
|
|
|
+ {'channel_id': 2, 'ncategory_id': 84, 'name': '经商种田', 'my_category_name': '古代言情', 'my_category_id': 82,
|
|
|
+ 'pname': '古代言情', 'id': 40},
|
|
|
+ {'channel_id': 2, 'ncategory_id': 110, 'name': '魔法幻情', 'my_category_name': '幻想言情', 'my_category_id': 93,
|
|
|
+ 'pname': '仙侠奇缘', 'id': 41},
|
|
|
+ {'channel_id': 2, 'ncategory_id': 110, 'name': '玄幻言情', 'my_category_name': '幻想言情', 'my_category_id': 93,
|
|
|
+ 'pname': '仙侠奇缘', 'id': 42},
|
|
|
+ {'channel_id': 2, 'ncategory_id': 111, 'name': '推理探险', 'my_category_name': '女生灵异', 'my_category_id': 92,
|
|
|
+ 'pname': '悬疑灵异', 'id': 43},
|
|
|
+ {'channel_id': 2, 'ncategory_id': 112, 'name': '灵异鬼怪', 'my_category_name': '女生灵异', 'my_category_id': 92,
|
|
|
+ 'pname': '悬疑灵异', 'id': 44},
|
|
|
+ {'channel_id': 2, 'ncategory_id': 106, 'name': '现代耽美', 'my_category_name': '耽美同人', 'my_category_id': 102,
|
|
|
+ 'pname': '耽美同人', 'id': 45},
|
|
|
+ {'channel_id': 2, 'ncategory_id': 106, 'name': '古代耽美', 'my_category_name': '耽美同人', 'my_category_id': 102,
|
|
|
+ 'pname': '耽美同人', 'id': 46},
|
|
|
+ {'channel_id': 2, 'ncategory_id': 106, 'name': '同人小说', 'my_category_name': '耽美同人', 'my_category_id': 102,
|
|
|
+ 'pname': '耽美同人', 'id': 47},
|
|
|
+ {'channel_id': 2, 'ncategory_id': 107, 'name': '中国文学', 'my_category_name': '其他', 'my_category_id': 100,
|
|
|
+ 'pname': '经典文学', 'id': 49},
|
|
|
+ {'channel_id': 2, 'ncategory_id': 107, 'name': '外国文学', 'my_category_name': '其他', 'my_category_id': 100,
|
|
|
+ 'pname': '经典文学', 'id': 50},
|
|
|
+ {'channel_id': 2, 'ncategory_id': 107, 'name': '成功励志', 'my_category_name': '其他', 'my_category_id': 100,
|
|
|
+ 'pname': '经管励志', 'id': 56},
|
|
|
+ {'channel_id': 2, 'ncategory_id': 107, 'name': '理财管理', 'my_category_name': '其他', 'my_category_id': 100,
|
|
|
+ 'pname': '经管励志', 'id': 57},
|
|
|
+ {'channel_id': 2, 'ncategory_id': 107, 'name': '少儿读物', 'my_category_name': '其他', 'my_category_id': 100,
|
|
|
+ 'pname': '少儿教育', 'id': 58},
|
|
|
+ {'channel_id': 2, 'ncategory_id': 107, 'name': '亲子教育', 'my_category_name': '其他', 'my_category_id': 100,
|
|
|
+ 'pname': '少儿教育', 'id': 59},
|
|
|
+ {'channel_id': 2, 'ncategory_id': 107, 'name': '社会军事', 'my_category_name': '其他', 'my_category_id': 100,
|
|
|
+ 'pname': '人文社科', 'id': 60},
|
|
|
+ {'channel_id': 2, 'ncategory_id': 107, 'name': '历史文化', 'my_category_name': '其他', 'my_category_id': 100,
|
|
|
+ 'pname': '人文社科', 'id': 61},
|
|
|
+ {'channel_id': 2, 'ncategory_id': 107, 'name': '健康生活', 'my_category_name': '其他', 'my_category_id': 100,
|
|
|
+ 'pname': '人文社科', 'id': 62},
|
|
|
+ {'channel_id': 2, 'ncategory_id': 107, 'name': '心理读物', 'my_category_name': '其他', 'my_category_id': 100,
|
|
|
+ 'pname': '人文社科', 'id': 63},
|
|
|
+ {'channel_id': 2, 'ncategory_id': 107, 'name': '科普读物', 'my_category_name': '其他', 'my_category_id': 100,
|
|
|
+ 'pname': '人文社科', 'id': 64},
|
|
|
+ {'channel_id': 2, 'ncategory_id': 107, 'name': '影视原著', 'my_category_name': '其他', 'my_category_id': 100,
|
|
|
+ 'pname': '文学艺术', 'id': 65},
|
|
|
+ {'channel_id': 2, 'ncategory_id': 107, 'name': '人物传记', 'my_category_name': '其他', 'my_category_id': 100,
|
|
|
+ 'pname': '文学艺术', 'id': 66},
|
|
|
+ {'channel_id': 2, 'ncategory_id': 107, 'name': '散文随笔', 'my_category_name': '其他', 'my_category_id': 100,
|
|
|
+ 'pname': '文学艺术', 'id': 67},
|
|
|
+ {'channel_id': 2, 'ncategory_id': 107, 'name': '诗词歌赋', 'my_category_name': '其他', 'my_category_id': 100,
|
|
|
+ 'pname': '文学艺术', 'id': 68},
|
|
|
+ {'channel_id': 2, 'ncategory_id': 107, 'name': '古代言情', 'my_category_name': '其他', 'my_category_id': 100,
|
|
|
+ 'pname': '出版小说', 'id': 69},
|
|
|
+ {'channel_id': 2, 'ncategory_id': 107, 'name': '现代言情', 'my_category_name': '其他', 'my_category_id': 100,
|
|
|
+ 'pname': '出版小说', 'id': 70},
|
|
|
+ {'channel_id': 2, 'ncategory_id': 107, 'name': '科幻未来', 'my_category_name': '其他', 'my_category_id': 100,
|
|
|
+ 'pname': '出版小说', 'id': 71},
|
|
|
+ {'channel_id': 2, 'ncategory_id': 107, 'name': '悬疑推理', 'my_category_name': '其他', 'my_category_id': 100,
|
|
|
+ 'pname': '出版小说', 'id': 72},
|
|
|
+ {'channel_id': 2, 'ncategory_id': 107, 'name': '武侠仙侠', 'my_category_name': '其他', 'my_category_id': 100,
|
|
|
+ 'pname': '出版小说', 'id': 73},
|
|
|
+ {'channel_id': 2, 'ncategory_id': 107, 'name': '青春文学', 'my_category_name': '其他', 'my_category_id': 100,
|
|
|
+ 'pname': '出版小说', 'id': 74},
|
|
|
+ {'channel_id': 2, 'ncategory_id': 107, 'name': '现实小说', 'my_category_name': '其他', 'my_category_id': 100,
|
|
|
+ 'pname': '出版小说', 'id': 75},
|
|
|
+ {'channel_id': 2, 'ncategory_id': 107, 'name': '历史小说', 'my_category_name': '其他', 'my_category_id': 100,
|
|
|
+ 'pname': '出版小说', 'id': 76}]
|
|
|
+
|
|
|
+
|
|
|
+def get_category(sub_name):
|
|
|
+ for item in category:
|
|
|
+ if item['name'] == sub_name:
|
|
|
+ return item
|
|
|
+
|
|
|
+ return {'channel_id': 2, 'ncategory_id': 88, 'name': '豪门世家', 'my_category_name': '现代言情', 'my_category_id': 87,
|
|
|
+ 'pname': '现代言情', 'id': 32}
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+class liuyueProcess(object):
|
|
|
+ name = name
|
|
|
+ allowed_domains = allowed_domains
|
|
|
+ source = source
|
|
|
+ source_name = source_name
|
|
|
+ source_id = source_id
|
|
|
+
|
|
|
+
|
|
|
+ def get_start_url(self):
|
|
|
+ return base_url + '/book?apiname=' + apiname
|
|
|
+
|
|
|
+ def bid_list_result(self, response):
|
|
|
+ result = json.loads(response.text)
|
|
|
+ if result is None:
|
|
|
+ return []
|
|
|
+ result_list = []
|
|
|
+ for item in result['result']:
|
|
|
+ result_list.append({'id': item['bookid']})
|
|
|
+ return result_list
|
|
|
+
|
|
|
+ def get_book_info_url(self, bid):
|
|
|
+ return base_url + '/bookInfo?apiname={}&bookid={}'.format(apiname,bid)
|
|
|
+
|
|
|
+ def book_info_result(self, response):
|
|
|
+ result = json.loads(response.text)
|
|
|
+ result = result.get('result')
|
|
|
+ if result is None:
|
|
|
+ return None
|
|
|
+ subcatid = result['category'].split(',')[2]
|
|
|
+ category_info = get_category(subcatid)
|
|
|
+ category_id = category_info['my_category_id']
|
|
|
+ ncategory_id = category_info['ncategory_id']
|
|
|
+ category_name = category_info['my_category_name']
|
|
|
+ channel_id = category_info['channel_id']
|
|
|
+ sub_subscribe_info = get_category_by_id(ncategory_id)
|
|
|
+ sub_subscribe_name = category_name if sub_subscribe_info is None else sub_subscribe_info['category_name']
|
|
|
+
|
|
|
+ return {
|
|
|
+ 'bid': result['bookid'], 'name': result['bookname'], 'author': result['authorname'],
|
|
|
+ 'intro': result['intro'], 'cover': result['bookpic'], 'keyword': result['tags'],
|
|
|
+ 'status': result['fullflag'],
|
|
|
+ 'category_id': ncategory_id,
|
|
|
+ 'category': sub_subscribe_name, 'channel': channel_id
|
|
|
+ }
|
|
|
+
|
|
|
+ def get_chapter_list_url(self, bid):
|
|
|
+ return base_url + '/Chapter?apiname={}&bookid={}'.format(apiname,bid)
|
|
|
+
|
|
|
+ def chapter_list_result(self, response):
|
|
|
+ result = json.loads(response.text)
|
|
|
+ if result is None:
|
|
|
+ return []
|
|
|
+
|
|
|
+ result_list = []
|
|
|
+ for chapter_item in result['result']:
|
|
|
+ result_list.append({
|
|
|
+ 'source_chapter_id': chapter_item['chapterid'], 'name': chapter_item['chaptername'],
|
|
|
+ 'sequence': chapter_item['chapterorder'], 'is_vip': chapter_item['vip'],
|
|
|
+ 'size': chapter_item['words'], 'recent_update_at': chapter_item['update_date']
|
|
|
+ })
|
|
|
+ return result_list
|
|
|
+
|
|
|
+ def get_chapter_content_url(self, bid, cid):
|
|
|
+ return base_url + '/Content?apiname={}&bookid={}&chapterid={}'.format(apiname,bid, cid)
|
|
|
+
|
|
|
+ def chapter_content_result(self, response):
|
|
|
+ result = json.loads(response.text)
|
|
|
+ if result is None:
|
|
|
+ return {'content': ''}
|
|
|
+
|
|
|
+ return {
|
|
|
+ 'content': result['result']['content'],
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+class liuyueSpider(liuyueProcess,baseSpider):
|
|
|
+ name = name
|
|
|
+
|
|
|
+ custom_settings = {
|
|
|
+ 'DOWNLOAD_DELAY': 0.1,
|
|
|
+ 'SOURCE': source,
|
|
|
+ 'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log'
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+class liuyueUpdateSpider(liuyueProcess,baseUpdateSpider):
|
|
|
+ name = name + "update"
|
|
|
+ custom_settings = {
|
|
|
+ 'DOWNLOAD_DELAY': 0.1,
|
|
|
+ 'SOURCE': source,
|
|
|
+ 'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log'
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+class liuyueFixSpider(liuyueProcess,fixChapterSpider):
|
|
|
+ name = name + 'fix'
|
|
|
+ custom_settings = {
|
|
|
+ 'DOWNLOAD_DELAY': 0.1,
|
|
|
+ 'SOURCE': source,
|
|
|
+ 'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log'
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+class liuyueBookInfoSpider(liuyueProcess,baseUpdateBookStatusSpider):
|
|
|
+ name = name + "bookinfo"
|
|
|
+ custom_settings = {
|
|
|
+ 'DOWNLOAD_DELAY': 0.1,
|
|
|
+ 'SOURCE': source,
|
|
|
+ 'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log'
|
|
|
+ }
|