浏览代码

update status

zhaoyang 2 年之前
父节点
当前提交
ea858534a7
共有 1 个文件被更改,包括 14 次插入6 次删除
  1. 14 6
      content_spider/baseSpider.py

+ 14 - 6
content_spider/baseSpider.py

@@ -492,12 +492,20 @@ class baseUpdateBookStatusSpider(scrapy.Spider):
         return cls(host=host, user=user, password=password, db=db, bid_list=bid_list, stats=crawler.stats)
         return cls(host=host, user=user, password=password, db=db, bid_list=bid_list, stats=crawler.stats)
 
 
     def start_requests(self):
     def start_requests(self):
-        book_list = self.mysqlHelper.get_need_update_book_list()
-        if book_list is not None:
-            for book in book_list:
-                url = self.get_book_info_url(book['cp_bid'])
-                meta = {'bid': book['id'], 'cp_bid': book['cp_bid']}
-                yield scrapy.Request(url, callback=self.parse_chapter_list, meta=meta)
+        if len(self.bid_list) >0:
+            for bid in self.bid_list:
+                book = self.mysqlHelper.get_book_info_by_id(bid)
+                if book is not None:
+                    url = self.get_book_info_url(book['cp_bid'])
+                    meta = {'bid': book['id'], 'cp_bid': book['cp_bid']}
+                    yield scrapy.Request(url, callback=self.parse_book_info, meta=meta)
+        else:
+            book_list = self.mysqlHelper.get_need_update_book_list()
+            if book_list is not None:
+                for book in book_list:
+                    url = self.get_book_info_url(book['cp_bid'])
+                    meta = {'bid': book['id'], 'cp_bid': book['cp_bid']}
+                    yield scrapy.Request(url, callback=self.parse_book_info, meta=meta)
 
 
     def parse_book_info(self, response):
     def parse_book_info(self, response):
         if response.text == '':
         if response.text == '':