zz пре 3 година
родитељ
комит
45e5379173

+ 15 - 5
ydyspider/baseSpider.py

@@ -16,6 +16,15 @@ def md5(token):
     return m.hexdigest()
 
 
+def sign(param, key):
+    param = sorted(param.items(), key=lambda x: x[0])
+    string = ''
+    for item in param:
+        string = string + '{}={}&'.format(str(item[0]), str(item[1]))
+    string = string + 'key={}'.format(key)
+    return md5(string).upper()
+
+
 def sha1(token):
     m = hashlib.sha1()
     m.update(token.encode('utf-8'))
@@ -86,13 +95,14 @@ class baseSpider(scrapy.Spider):
         book_info_item['keyword'] = result['keyword']
         book_info_item['category_id'] = 0 if result.get('category_id') is None else result.get('category_id')
         book_info_item['status'] = result['status']
-        book_info_item['chapter_count'] = 0
+        book_info_item['chapter_count'] = 0 if result.get('chapter_count') is None else result.get('chapter_count')
         book_info_item['first_cid'] = 0
         book_info_item['last_cid'] = 0
-        book_info_item['size'] = 0
-        book_info_item['last_chapter'] = ''
-        book_info_item['category_name'] = result['category']
-        book_info_item['source'] = self.source
+        book_info_item['size'] = 0 if result.get('size') is None else result.get('size')
+        book_info_item['last_chapter'] = '' if result.get('last_chapter') is None else result.get('last_chapter')
+        book_info_item['category_name'] = result['category_name']
+        book_info_item['source_name'] = self.source
+        book_info_item['gender'] = 0 if result.get('gender') is None else result.get('gender')
         book_info_item['updated_at'] = now
         book_info_item['created_at'] = now
         bid = self.mysqlHelper.insert_book(book_info_item)

+ 3 - 1
ydyspider/items.py

@@ -13,9 +13,10 @@ class YdyspiderItem(scrapy.Item):
     # name = scrapy.Field()
     pass
 
+
 class BookInfoItem(scrapy.Item):
     source_bid = scrapy.Field()
-    source = scrapy.Field()
+    source_name = scrapy.Field()
     name = scrapy.Field()
     author = scrapy.Field()
     intro = scrapy.Field()
@@ -29,6 +30,7 @@ class BookInfoItem(scrapy.Item):
     size = scrapy.Field()
     last_chapter = scrapy.Field()
     category_name = scrapy.Field()
+    gender = scrapy.Field()
     updated_at = scrapy.Field()
     created_at = scrapy.Field()
 

+ 0 - 0
ydyspider/spiders/zhaoniu/__init__.py


+ 125 - 0
ydyspider/spiders/zhaoniu/zhaoniu.py

@@ -0,0 +1,125 @@
+# -*- coding: utf-8 -*-
+
+from ydyspider.baseSpider import baseSpider
+from ydyspider.baseSpider import baseUpdateSpider
+from ydyspider.baseSpider import fixChapterSpider
+from ydyspider.baseSpider import sign
+from urllib.parse import urlencode
+import json
+import time
+
+allowed_domains = ['book.zhuishuyun.com']
+cp = 'cp'
+key = ''
+base_url = 'http://book.zhuishuyun.com/api/book/{}?'
+source = 'zhaoniu'
+
+
+class zhaoniu(object):
+    allowed_domains = allowed_domains
+    base_url = base_url
+    source = source
+
+    def get_start_url(self):
+        param = {'cp': cp, 'is_enable': 1, 'page': 1, 'timestamp': int(time.time())}
+        param['sign'] = sign(param, key)
+        return self.base_url.format('getBookList') + urlencode(param)
+
+    def bid_list_result(self, response):
+        result = json.loads(response.text)
+        if result is None:
+            return []
+        result_list = []
+        for item in result['data']:
+            result_list.append({'id': item['articleid']})
+        return result_list
+
+    def get_book_info_url(self, bid):
+        param = {'cp': cp, 'book_id': bid, 'timestamp': int(time.time())}
+        param['sign'] = sign(param, key)
+        return self.base_url.format('getBook') + urlencode(param)
+
+    def book_info_result(self, response):
+        result = json.loads(response.text)
+        if result is None:
+            return None
+        result = result['data']
+        return {
+            'bid': result['book_id'], 'name': result['book_name'], 'author': result['book_author'],
+            'intro': result['book_summary'], 'cover': result['cover_url'], 'keyword': result['book_roles'],
+            'status': result['book_end_status'], 'category_name': result['book_category'],
+            'size': result['book_word_count'], 'last_chapter': result['last_chapter'],
+            'chapter_count': result['book_chapter_total'],
+            'gender': result['book_category_pid']
+        }
+
+    def get_chapter_list_url(self, bid):
+        param = {'cp': cp, 'book_id': bid, 'timestamp': int(time.time()), 'page': 1}
+        param['sign'] = sign(param, key)
+        return self.base_url.format('getCatalog') + urlencode(param)
+
+    def chapter_list_result(self, response):
+        result = json.loads(response.text)
+        if result is None:
+            return []
+
+        result_list = []
+        for chapter_item in result['data']:
+            result_list.append({
+                'source_chapter_id': chapter_item['chapterid'], 'name': chapter_item['chaptername'],
+                'sequence': chapter_item['chapterorder'], 'is_vip': chapter_item['isvip'],
+                'size': chapter_item['words'], 'recent_update_at': chapter_item['lastupdate']
+            })
+        return result_list
+
+    def get_chapter_content_url(self, bid, cid):
+        param = {'cp': cp, 'chapter_id': cid, 'timestamp': int(time.time())}
+        param['sign'] = sign(param, key)
+        return self.base_url.format('getChapter') + urlencode(param)
+
+    def chapter_content_result(self, response):
+        result = json.loads(response.text)
+        if result is None:
+            return {'content': ''}
+        return {
+            'content': result['data']['content']
+        }
+
+
+class zhaoniuSpider(zhaoniu, baseSpider):
+    name = 'zhaoniu'
+    allowed_domains = allowed_domains
+    base_url = base_url
+    source = source
+
+    custom_settings = {
+        'DOWNLOAD_DELAY': 0.05,
+        'SOURCE': source,
+        'LOG_FILE': 'ydyspider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log'
+    }
+
+
+class zhaoniuupdateSpider(zhaoniu, baseUpdateSpider):
+    name = 'zhaoniuupdate'
+    allowed_domains = allowed_domains
+    base_url = base_url
+    source = source
+
+    custom_settings = {
+        'DOWNLOAD_DELAY': 0.05,
+        'SOURCE': source,
+        'LOG_FILE': 'ydyspider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log'
+    }
+
+
+class zhaoniufixSpider(zhaoniu, fixChapterSpider):
+    name = 'zhaoniufix'
+    allowed_domains = allowed_domains
+    base_url = base_url
+    source = source
+
+    custom_settings = {
+        'DOWNLOAD_DELAY': 0.05,
+        'SOURCE': source,
+        'LOG_FILE': 'ydyspider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log'
+    }