zhaoyang 2 年之前
父節點
當前提交
dba6789d01

+ 6 - 0
README.md

@@ -56,3 +56,9 @@
   更新完结状态:  scrapy crawl aiyouhuyubookinfo
   覆盖命令:  scrapy crawl aiyouhuyufix -a bid=bid1,bid2
 
+## yuyuedu娱阅读:
+文件目录: content_spider/spiders/yuyuedu
+  采集命令: scrapy crawl yuyuedu
+  更新命令: scrapy crawl yuyueduupdate
+  更新完结状态:  scrapy crawl yuyuedubookinfo
+  覆盖命令:  scrapy crawl yuyuedufix -a bid=bid1,bid2

+ 0 - 0
content_spider/spiders/shuhai/__init__.py


+ 0 - 0
content_spider/spiders/shuhai/shuhai.py


+ 0 - 0
content_spider/spiders/yuyuedu/__init__.py


+ 190 - 0
content_spider/spiders/yuyuedu/yuyuedu.py

@@ -0,0 +1,190 @@
+# -*- coding: utf-8 -*-
+
+from content_spider.baseSpider import baseSpider
+from content_spider.baseSpider import baseUpdateSpider
+from content_spider.baseSpider import fixChapterSpider
+from content_spider.baseSpider import baseUpdateBookStatusSpider
+import time
+import json
+
+
+name = 'yuyuedu'
+allowed_domains = ['api.yuread.cn']
+source = 'zy_yuyuedu'
+source_name = 'yuyuedu娱阅读'
+source_id = 36
+base_url = 'http://api.xingheread.cn/zhiyu/{}'
+
+
+category =  [
+{"sid": "1","title": "男生频道","pid": "0","channel_id":1,'category_name': '武侠仙侠', 'category_id': 21}, 
+{"sid": "2","title": "女生频道","pid": "0","channel_id":2,'category_name': '婚恋情感', 'category_id': 98}, 
+{"sid": "3","title": "现代都市","pid": "1","channel_id":1,'category_name': '都市爱情', 'category_id': 54}, 
+{"sid": "4","title": "玄幻仙侠","pid": "1","channel_id":1,'category_name': '武侠仙侠', 'category_id': 21}, 
+{"sid": "5","title": "历史竞技","pid": "1","channel_id":1,'category_name': '历史穿越', 'category_id': 14}, 
+{"sid": "6","title": "悬疑灵异","pid": "1","channel_id":1,'category_name': '灵异恐怖', 'category_id': 81}, 
+{"sid": "7","title": "科幻末世","pid": "1","channel_id":1,'category_name': '玄幻奇幻', 'category_id': 23}, 
+{"sid": "8","title": "现代言情","pid": "2","channel_id":2,'category_name': '婚恋情感', 'category_id': 98}, 
+{"sid": "9","title": "穿越古言","pid": "2","channel_id":2,'category_name': '穿越重生', 'category_id': 83}, 
+{"sid": "10","title": "玄幻奇幻","pid": "2","channel_id":2,'category_name': '东方玄幻', 'category_id': 96}, 
+{"sid": "11","title": "灵异言情","pid": "2","channel_id":2,'category_name': '鬼夫言情', 'category_id': 115}, 
+{"sid": "12","title": "唯美同人","pid": "2","channel_id":2,'category_name': '耽美同人', 'category_id': 106}, 
+{"sid": "13","title": "官场商场","pid": "3","channel_id":1,'category_name': '官场沉浮', 'category_id': 55}, 
+{"sid": "14","title": "热血青春","pid": "3","channel_id":1,'category_name': '青春爱情', 'category_id': 94}, 
+{"sid": "15","title": "异术超能","pid": "3","channel_id":1,'category_name': '现代修真', 'category_id': 68}, 
+{"sid": "16","title": "热血都市","pid": "3","channel_id":1,'category_name': '都市爱情', 'category_id': 54}, 
+{"sid": "17","title": "社会百态","pid": "3","channel_id":1,'category_name': '其他作品', 'category_id': 127}, 
+{"sid": "18","title": "乡村生活","pid": "3","channel_id":1,'category_name': '其他作品', 'category_id': 127}, 
+{"sid": "19","title": "西方玄幻","pid": "4","channel_id":1,'category_name': '西方玄幻', 'category_id': 22}, 
+{"sid": "20","title": "东方玄幻","pid": "4","channel_id":1,'category_name': '武侠仙侠', 'category_id': 21}, 
+{"sid": "21","title": "异世修真","pid": "4","channel_id":1,'category_name': '玄幻奇幻', 'category_id': 23}, 
+{"sid": "22","title": "神话魔法","pid": "4","channel_id":1,'category_name': '玄幻奇幻', 'category_id': 23}, 
+{"sid": "23","title": "战争幻想","pid": "5","channel_id":1,'category_name': '玄幻奇幻', 'category_id': 23}, 
+{"sid": "24","title": "历史传记","pid": "5","channel_id":1,'category_name': '特种军旅', 'category_id': 51}, 
+{"sid": "25","title": "架空历史","pid": "5","channel_id":1,'category_name': '历史穿越', 'category_id': 14}, 
+{"sid": "26","title": "体育竞技","pid": "5","channel_id":1,'category_name': '游戏竞技', 'category_id': 19}, 
+{"sid": "27","title": "虚拟网游","pid": "5","channel_id":1,'category_name': '游戏竞技', 'category_id': 19}, 
+{"sid": "28","title": "风水秘术","pid": "6","channel_id":1,'category_name': '灵异恐怖', 'category_id': 81}, 
+{"sid": "29","title": "悬疑推理","pid": "6","channel_id":1,'category_name': '灵异恐怖', 'category_id': 81}, 
+{"sid": "30","title": "恐怖惊悚","pid": "6","channel_id":1,'category_name': '灵异恐怖', 'category_id': 81}, 
+{"sid": "31","title": "末世危机","pid": "7","channel_id":1,'category_name': '西方玄幻', 'category_id': 22}, 
+{"sid": "32","title": "位面空间","pid": "7","channel_id":1,'category_name': '西方玄幻', 'category_id': 22}, 
+{"sid": "33","title": "星际幻想","pid": "7","channel_id":1,'category_name': '西方玄幻', 'category_id': 22}, 
+{"sid": "34","title": "科幻机甲","pid": "7","channel_id":1,'category_name': '西方玄幻', 'category_id': 22}, 
+{"sid": "35","title": "婚恋职场","pid": "8","channel_id":2,'category_name': '婚恋情感', 'category_id': 98}, 
+{"sid": "36","title": "总裁豪门","pid": "8","channel_id":2,'category_name': '豪门总裁', 'category_id': 88}, 
+{"sid": "37","title": "现代重生","pid": "8","channel_id":2,'category_name': '都市异能', 'category_id': 118}, 
+{"sid": "38","title": "社会女性","pid": "8","channel_id":2,'category_name': '婚恋情感', 'category_id': 98}, 
+{"sid": "39","title": "青春校园","pid": "8","channel_id":2,'category_name': '青春校园', 'category_id': 104}, 
+{"sid": "40","title": "技能女强","pid": "9","channel_id":2,'category_name': '都市职场', 'category_id': 116}, 
+{"sid": "41","title": "宫斗宅斗","pid": "9","channel_id":2,'category_name': '宫斗宅斗', 'category_id': 120}, 
+{"sid": "42","title": "经商种田","pid": "9","channel_id":2,'category_name': '经商种田', 'category_id': 84}, 
+{"sid": "43","title": "民国旧影","pid": "9","channel_id":2,'category_name': '清穿民国', 'category_id': 122}, 
+{"sid": "44","title": "女尊天下","pid": "9","channel_id":2,'category_name': '女尊王朝', 'category_id': 123}, 
+{"sid": "45","title": "魔法幻情","pid": "10","channel_id":2,'category_name': '古典仙侠', 'category_id': 97}, 
+{"sid": "46","title": "玄幻女强","pid": "10","channel_id":2,'category_name': '东方玄幻', 'category_id': 96}, 
+{"sid": "47","title": "仙侠修真","pid": "10","channel_id":2,'category_name': '东方玄幻', 'category_id': 96}, 
+{"sid": "48","title": "科幻末世","pid": "10","channel_id":2,'category_name': '东方玄幻', 'category_id': 96}, 
+{"sid": "49","title": "空间网游","pid": "10","channel_id":2,'category_name': '游戏', 'category_id': 119}, 
+{"sid": "50","title": "现代灵异","pid": "11","channel_id":2,'category_name': '女生灵异', 'category_id': 95}, 
+{"sid": "51","title": "民国灵异","pid": "11","channel_id":2,'category_name': '鬼夫言情', 'category_id': 115}, 
+{"sid": "52","title": "古代灵异","pid": "11","channel_id":2,'category_name': '鬼夫言情', 'category_id': 115}, 
+{"sid": "53","title": "现代耽美","pid": "12","channel_id":2,'category_name': '耽美同人', 'category_id': 106}, 
+{"sid": "54","title": "古代耽美","pid": "12","channel_id":2,'category_name': '耽美同人', 'category_id': 106}, 
+{"sid": "55","title": "明星同人","pid": "12","channel_id":2,'category_name': '耽美同人', 'category_id': 106}, 
+{"sid": "56","title": "动漫同人","pid": "12","channel_id":2,'category_name': '耽美同人', 'category_id': 106}, 
+{"sid": "57","title": "短篇","pid": "3","channel_id":1,'category_name': '轻小说', 'category_id': 126}, 
+{"sid": "58","title": "短篇","pid": "8","channel_id":2,'category_name': '其他', 'category_id': 107}
+]
+
+
+
+def get_category(sort_id):
+    for item in category:
+        if str(sort_id) == item['sid']:
+            return item
+    return category[0]
+
+
+class yuyueduProcess():
+    name = name
+    allowed_domains = allowed_domains
+    source = source
+    source_name = source_name
+    source_id = source_id
+
+
+    def get_start_url(self):
+        return base_url.format('bookList')
+
+    def bid_list_result(self, response):
+        result = json.loads(response.text)
+        if result is None or result.get('data') is None:
+            return []
+        result_list = []
+        for item in result['data']:
+            result_list.append({'id': item['bookid']})
+        return result_list
+
+    def get_book_info_url(self, bid):
+        return base_url.format('bookInfo') + '?bookId={}'.format(bid)
+
+    def book_info_result(self, response):
+        result = json.loads(response.text)
+        result = result['data']
+        category_info = get_category(result['sort_id']);
+        return {
+            'bid': result['bookid'], 'name': result['title'], 'author': result['author'],
+            'intro': result['description'], 'cover': result['cover'], 'keyword': result['tag'],
+            'status':  result['is_finish'], 'category': category_info['category_name'],'category_id':category_info['category_id'],
+            'channel': category_info['channel_id']
+        }
+
+    def get_chapter_list_url(self, bid):
+        return base_url.format('chapterList') + '?bookId={}'.format(bid)
+
+    def chapter_list_result(self, response):
+        result = json.loads(response.text)
+        if result is None or result.get('data') is None:
+            return []
+
+        result_list = []
+        i = 0
+        for chapter_item in result['data']:
+            i = i+1
+            result_list.append({
+                'source_chapter_id': chapter_item['id'], 'name': chapter_item['title'],
+                'sequence': i, 'is_vip': chapter_item['isvip'],
+                'size': 0, 'recent_update_at': chapter_item['update_time']
+            })
+        return result_list
+
+    def get_chapter_content_url(self, bid, cid):
+        return base_url.format('content') + '?bookId={}&chapterId={}'.format(bid, cid)
+
+    def chapter_content_result(self, response):
+        result = json.loads(response.text)
+        if result is None:
+            return {'content': ''}
+
+        return {
+            'content': result['data']['content'],
+            'size':  len(result['data']['content'])
+        }
+
+    
+class yuyueduSpider(yuyueduProcess,baseSpider):
+    name = name
+    custom_settings = {
+        'DOWNLOAD_DELAY': 0.1,
+        'SOURCE': source,
+        'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log'
+    }
+
+
+class yuyueduUpdateSpider(yuyueduProcess,baseUpdateSpider):
+    name = name + "update"
+    custom_settings = {
+        'DOWNLOAD_DELAY': 0.1,
+        'SOURCE': source,
+        'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log'
+    }
+
+
+    
+class yuyueduFixSpider(yuyueduProcess,fixChapterSpider):
+    name = name + 'fix'
+    custom_settings = {
+        'DOWNLOAD_DELAY': 0.1,
+        'SOURCE': source,
+        'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log'
+    }
+
+
+class yuyueduBookInfoSpider(yuyueduProcess,baseUpdateBookStatusSpider):
+    name = name + "bookinfo"
+    custom_settings = {
+        'DOWNLOAD_DELAY': 0.1,
+        'SOURCE': source,
+        'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log'
+    }

文件差異過大導致無法顯示
+ 868 - 0
content_spider/spiders/yuyuedu/公用接口 (1).html


+ 21 - 61
content_spider/temp_test.py

@@ -13,69 +13,29 @@ from xml.dom.minidom import parseString
 import time
 import xlrd
 
-df = xlrd.open_workbook("./11111.xls")
-table=df.sheets()[0]
+#df = xlrd.open_workbook("./11111.xls")
+#table=df.sheets()[0]
 
 
 result = []
 
-for i in range(1,table.nrows):
-    row = table.row_values(i)
-    if len(row) <= 0:
-        break
-    name = row[1]
-    cid = int(row[0])
-    item = {"id":cid,"name":name}
-    if cid < 2000:
+#for i in range(1,table.nrows):
+#    row = table.row_values(i)
+#    if len(row) <= 0:
+#        break
+#    name = row[1]
+#    cid = int(row[0])
+#    item = {"id":cid,"name":name}
+
+
+
+
+category = [
+    ""
+];
+
+for item in category:
+    if item['sid'] == "1":
         item['channel_id'] = 1
-    else:
-        item['channel_id'] = 2
-    
-    if cid == 1001:
-        item['category_name'] = "都市爱情"
-        item['category_id'] = 54
-    if cid == 1002 or cid == 1003 or cid == 1012:
-        item['category_name'] = "武侠仙侠"
-        item['category_id'] = 21
-    if cid == 1004 or cid == 1009:
-        item['category_name'] = "历史穿越"
-        item['category_id'] = 14
-    if cid == 1005 or cid == 1016:
-        item['category_name'] = "灵异恐怖"
-        item['category_id'] = 81
-    if cid == 1006 or cid == 1014:
-        item['category_name'] = "游戏竞技"
-        item['category_id'] = 19
-    if cid == 1007 or cid == 1008:
-        item['category_name'] = "玄幻奇幻"
-        item['category_id'] = 23
-    if cid == 1019 or cid == 1020 or cid == 1021:
-        item['category_name'] = "其他作品"
-        item['category_id'] = 127
-    if cid == 2001:
-        item['category_name'] = "婚恋情感"
-        item['category_id'] = 87
-    if cid == 2002 or cid == 2011:
-        item['category_name'] = "穿越重生"
-        item['category_id'] = 83
-    if cid == 2003 or cid == 2014 or cid == 2015 or cid  == 2016:
-        item['category_name'] = "东方玄幻"
-        item['category_id'] = 96
-    if cid == 2004:
-        item['category_name'] = "青春校园"
-        item['category_id'] = 104
-    if cid == 2007 or cid == 2013 or cid == 2019 or cid == 2020 or cid == 2021:
-        item['category_name'] = "其他"
-        item['category_id'] = 107
-    if cid == 2009:
-        item['category_name'] = "悬疑探险"
-        item['category_id'] = 113
-    if cid == 2012 or cid == 2017:
-        item['category_name'] = "青春纯爱"
-        item['category_id'] = 103
-    if cid == 2018:
-        item['category_name'] = "游戏"
-        item['category_id'] = 113
-    result.append(item)
-    
-print(result)
+    if item['sid'] == "2":
+        item['channel_id'] = 2