|
@@ -2,6 +2,7 @@
|
|
|
import scrapy
|
|
|
from ydyspider.mysql import msyqlHelper
|
|
|
import json
|
|
|
+import time
|
|
|
|
|
|
|
|
|
class zycontentSpider(scrapy.Spider):
|
|
@@ -9,14 +10,34 @@ class zycontentSpider(scrapy.Spider):
|
|
|
allowed_domains = ['60.204.150.173']
|
|
|
query = '?channel_name=zhuishuyun&channel_key=123456'
|
|
|
base_url = 'http://60.204.150.173:8094/api/output'
|
|
|
+ custom_settings = {
|
|
|
+ 'DOWNLOAD_DELAY': 0.01,
|
|
|
+ 'LOG_FILE': 'ydyspider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log'
|
|
|
+ }
|
|
|
|
|
|
def start_requests(self):
|
|
|
self.crawler.stats.set_value('bid_list', [])
|
|
|
param = self.bid
|
|
|
- bid_list = param.split(',')
|
|
|
- for bid in bid_list:
|
|
|
- url = self.base_url + '/bookdetail/{}'.format(bid) + self.query
|
|
|
- yield scrapy.Request(url, callback=self.parse2, meta={"zhiyu_book_id": bid, "i": 0})
|
|
|
+ if param is not None:
|
|
|
+ bid_list = param.split(',')
|
|
|
+ for bid in bid_list:
|
|
|
+ url = self.base_url + '/bookdetail/{}'.format(bid) + self.query
|
|
|
+ yield scrapy.Request(url, callback=self.parse2, meta={"zhiyu_book_id": bid, "i": 0})
|
|
|
+ else:
|
|
|
+ url = self.base_url + '/booklist' + self.query
|
|
|
+ yield scrapy.Request(url, callback=self.parse1, meta={"zhiyu_book_id": bid, "i": 0})
|
|
|
+
|
|
|
+ def parse1(self,response):
|
|
|
+ res = response.text
|
|
|
+ res = self.json_encode(res)
|
|
|
+ book_list = res.get('data')
|
|
|
+ mysql = msyqlHelper()
|
|
|
+ for book_item in book_list:
|
|
|
+ bid = book_item['bid']
|
|
|
+ zhiyu_book = mysql.getZyBook(bid)
|
|
|
+ if zhiyu_book is not None:
|
|
|
+ url = self.base_url + '/bookdetail/{}'.format(bid) + self.query
|
|
|
+ yield scrapy.Request(url, callback=self.parse2, meta={"zhiyu_book_id": bid, "i": 0})
|
|
|
|
|
|
def parse2(self, response):
|
|
|
mysql = msyqlHelper()
|