|
@@ -0,0 +1,37 @@
|
|
|
+# -*- coding: utf-8 -*-
|
|
|
+import time
|
|
|
+
|
|
|
+import scrapy
|
|
|
+from ydyspider.mysql import msyqlHelper
|
|
|
+import json
|
|
|
+from . import zwcontentSpider
|
|
|
+
|
|
|
+
|
|
|
+class zwcontentlianshangSpider(zwcontentSpider.zwcontentSpider):
|
|
|
+ name = 'zwcontentlianshnag'
|
|
|
+ allowed_domains = ['/cp.yqsd.cn']
|
|
|
+ base_url = 'http://cp.yqsd.cn/api/book'
|
|
|
+ custom_settings = {
|
|
|
+ 'DOWNLOAD_DELAY': 0.01,
|
|
|
+ 'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log'
|
|
|
+ }
|
|
|
+
|
|
|
+ def start_requests(self):
|
|
|
+ self.crawler.stats.set_value('bid_list', [])
|
|
|
+ mysql = msyqlHelper()
|
|
|
+ last_book = mysql.getLianshang()
|
|
|
+ bid = last_book['zw_id']
|
|
|
+ start_url = self.base_url + '/booklist/lianshang/{}'.format(bid)
|
|
|
+ yield scrapy.Request(start_url, callback=self.parse_book_list)
|
|
|
+
|
|
|
+ def parse_book_list(self, response):
|
|
|
+ result = json.loads(response.text)
|
|
|
+ if result.get('data') is not None:
|
|
|
+ i = 0
|
|
|
+ for item in result['data']:
|
|
|
+ i = i+1
|
|
|
+ if i > 1000:
|
|
|
+ break
|
|
|
+ bid = item['id']
|
|
|
+ url = self.base_url + '/bookInfo/{}'.format(bid)
|
|
|
+ yield scrapy.Request(url, callback=self.parse2, meta={"zw_id": bid, "i": 0})
|