Browse Source

update status

zhaoyang 2 years ago
parent
commit
f5622c2343

+ 54 - 1
content_spider/baseSpider.py

@@ -195,7 +195,7 @@ class baseUpdateSpider(scrapy.Spider):
         book_list = self.mysqlHelper.get_need_update_book_list()
         if book_list is not None:
             for book in book_list:
-                url = self.get_chapter_list_url(book['copilot'])
+                url = self.get_chapter_list_url(book['cp_bid'])
                 meta = {'bid': book['id'], 'cp_bid': book['cp_bid']}
                 yield scrapy.Request(url, callback=self.parse_chapter_list, meta=meta)
 
@@ -463,3 +463,56 @@ class fixBookInfoSpider(scrapy.Spider):
 
     def book_info_result(self, response):
         raise NotImplementedError
+
+
+class updateBookStatusSpider(scrapy.Spider):
+    name = ''
+    source = ''
+    source_name = ''
+    source_id = 0
+
+    def __init__(self, host, user, password, db, bid_list, stats):
+        scrapy.Spider.__init__(self)
+        source = self.source
+        self.mysqlHelper = MysqlHelper(host=host, user=user, password=password, db=db, source=source,source_id=self.source_id)
+        self.bid_list = bid_list
+
+    @classmethod
+    def from_crawler(cls, crawler, *args, **kwargs):
+        settings = crawler.settings
+        host = settings.get('MYSQL_HOST')
+        user = settings.get('MYSQL_USER')
+        password = settings.get('MYSQL_PWD')
+        db = settings.get('MYSQL_DB')
+        bid = kwargs.get('bid')
+        if bid is not None:
+            bid_list = bid.split(',')
+        else:
+            bid_list = []
+        return cls(host=host, user=user, password=password, db=db, bid_list=bid_list, stats=crawler.stats)
+
+    def start_requests(self):
+        book_list = self.mysqlHelper.get_need_update_book_list()
+        if book_list is not None:
+            for book in book_list:
+                url = self.get_book_info_url(book['cp_bid'])
+                meta = {'bid': book['id'], 'cp_bid': book['cp_bid']}
+                yield scrapy.Request(url, callback=self.parse_chapter_list, meta=meta)
+
+    def parse_book_info(self, response):
+        if response.text == '':
+            return None
+        result = self.book_info_result(response)
+        if result is None:
+            yield
+            return
+        bid =  response.meta['bid']
+        status = result['status']
+        if int(status) == 1:
+            self.mysqlHelper.update_book_status(bid,status)
+
+    def get_book_info_url(self, bid):
+        raise NotImplementedError
+
+    def book_info_result(self, response):
+        raise NotImplementedError

+ 6 - 0
content_spider/mysqlHelper.py

@@ -133,6 +133,12 @@ class MysqlHelper(object):
                 book_info['gender'], book_info['category'], int(book_info['bid'])))
         self.__conn.commit()
 
+    def update_book_status(self, bid,status):
+        sql = 'update zy_books set status={} where id ={}'.format(status,bid)
+        with self.__conn.cursor() as cursor:
+            cursor.execute(sql)
+        self.__conn.commit()
+
 
     def re_sequence(self, bid):
         sql = '''

+ 34 - 0
content_spider/spiders/kanshu/bookupdate.py

@@ -2,6 +2,7 @@
 
 import random
 from content_spider.baseSpider import baseUpdateSpider
+from content_spider.baseSpider import baseUpdateBookStatusSpider
 import json
 import time
 
@@ -51,3 +52,36 @@ class BookupdateSpider(baseUpdateSpider):
             'content': result['data']['content'],
             'size': len(result['data']['content'])
         }
+    
+
+class BookupdateStatusSpider(baseUpdateBookStatusSpider):
+    name = 'kanshuupdatestatus'
+    allowed_domains = ['hezuo.lunjian.com']
+    source = 'kanshu'
+    source_name = '看书'
+    source_id = 19
+    base_url = 'http://hezuo.lunjian.com/open/ksbook/{}?channel_id=10054'
+
+    custom_settings = {
+        'DOWNLOAD_DELAY': 0.01,
+        'SOURCE': source,
+        'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log'
+    }
+
+
+    def get_book_info_url(self, bid):
+        return self.base_url.format('BookDetail') + '&bookid={}'.format(bid)
+
+    def book_info_result(self, response):
+        result = json.loads(response.text)
+        if result is None or result.get('data') is None:
+            return None
+        result = result['data']
+
+        return {
+            'bid': result['id'], 'name': result['bookTitle'], 'author': result['author'],
+            'intro': result['introduction'], 'cover': result['cover'], 'keyword': result['labels'],
+            'status': result['state'], 'category': result['category'],'category_id':1,
+            'channel': result['channelId']
+        }
+