瀏覽代碼

kanshuwang category fix command

zhaoyang 2 年之前
父節點
當前提交
ac174e407a
共有 2 個文件被更改,包括 52 次插入3 次删除
  1. 2 2
      content_spider/mysqlHelper.py
  2. 50 1
      content_spider/spiders/kanshu/book.py

+ 2 - 2
content_spider/mysqlHelper.py

@@ -127,11 +127,11 @@ class MysqlHelper(object):
         self.__conn.commit()
 
     def update_book_info(self, book_info):
-        sql = 'update zy_books set `name`=%s,author=%s,intro=%s,cover=%s,gender=%s,category_name=%s where id =%s'
+        sql = 'update zy_books set `name`=%s,author=%s,intro=%s,cover=%s,channel=%s,category_name=%s,category_id=%s where id =%s'
         with self.__conn.cursor() as cursor:
             cursor.execute(sql, (
                 book_info['name'], book_info['author'], book_info['intro'], book_info['cover'],
-                book_info['gender'], book_info['category'], int(book_info['bid'])))
+                book_info['channel'], book_info['category'],book_info['category_id'], int(book_info['bid'])))
         self.__conn.commit()
 
     def update_book_status(self, bid,status):

+ 50 - 1
content_spider/spiders/kanshu/book.py

@@ -1,6 +1,7 @@
 # -*- coding: utf-8 -*-
 
 from content_spider.baseSpider import baseSpider
+from content_spider.baseSpider import fixBookInfoSpider
 from content_spider.Util import get_category_by_name
 import json
 import time
@@ -135,10 +136,13 @@ class BookSpider(baseSpider):
             return None
         result = result['data']
         category_info = get_category_by_name(result['category'])
+        category_id = 0
+        if category_info is not None:
+               category_id =  category_info['id']
         return {
             'bid': result['id'], 'name': result['bookTitle'], 'author': result['author'],
             'intro': result['introduction'], 'cover': result['cover'], 'keyword': result['labels'],
-            'status': result['state'], 'category': result['category'],'category_id':category_info['id'],
+            'status': result['state'], 'category': result['category'],'category_id':category_id,
             'channel': result['channelId']
         }
 
@@ -174,3 +178,48 @@ class BookSpider(baseSpider):
             'size': len(result['data']['content'])
         }
 
+
+class BookInfoFixSpider(fixBookInfoSpider):
+    name = 'kanshubookinfofix'
+    allowed_domains = ['hezuo.lunjian.com']
+    source = 'zy_kanshu'
+    source_name = '看书'
+    source_id = 19
+    base_url = 'http://hezuo.lunjian.com/open/ksbook/{}?channel_id=10054'
+
+    custom_settings = {
+        'DOWNLOAD_DELAY': 0.01,
+        'SOURCE': source,
+        'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log'
+    }
+
+    def get_start_url(self):
+        return self.base_url.format('bookLists')
+
+    def bid_list_result(self, response):
+        result = json.loads(response.text)
+        if result is None:
+            return []
+        result_list = []
+        for item in result['data']:
+            result_list.append({'id': item['id']})
+        return result_list
+
+    def get_book_info_url(self, bid):
+        return self.base_url.format('BookDetail') + '&bookid={}'.format(bid)
+
+    def book_info_result(self, response):
+        result = json.loads(response.text)
+        if result is None or result.get('data') is None:
+            return None
+        result = result['data']
+        category_info = get_category_by_name(result['category'])
+        category_id = 0
+        if category_info is not None:
+               category_id =  category_info['id']
+        return {
+            'bid': result['id'], 'name': result['bookTitle'], 'author': result['author'],
+            'intro': result['introduction'], 'cover': result['cover'], 'keyword': result['labels'],
+            'status': result['state'], 'category': result['category'],'category_id':category_id,
+            'channel': result['channelId']
+        }