|
@@ -0,0 +1,73 @@
|
|
|
+# -*- coding: utf-8 -*-
|
|
|
+import scrapy
|
|
|
+from ydyspider.mysql import msyqlHelper
|
|
|
+import json
|
|
|
+
|
|
|
+
|
|
|
+class zycontentSpider(scrapy.Spider):
|
|
|
+ name = 'zycontent'
|
|
|
+ allowed_domains = ['cp.yqsd.cn']
|
|
|
+ query = '?channel_name=zhuishuyun&channel_key=123456'
|
|
|
+ base_url = 'http://cp.yqsd.cn/api/output/'
|
|
|
+
|
|
|
+ def start_requests(self):
|
|
|
+ self.crawler.stats.set_value('bid_list', [])
|
|
|
+ param = self.bid
|
|
|
+ bid_list = param.split(',')
|
|
|
+ for bid in bid_list:
|
|
|
+ url = self.base_url + '/bookdetail/{}'.format(bid) + self.query
|
|
|
+ yield scrapy.Request(url, callback=self.parse2, meta={"zhiyu_book_id": bid, "i": 0})
|
|
|
+
|
|
|
+ def parse2(self, response):
|
|
|
+ mysql = msyqlHelper()
|
|
|
+ res = response.text
|
|
|
+ res = self.json_encode(res)
|
|
|
+ data = dict()
|
|
|
+ data['zhiyu_book_id'] = res['data']['bid']
|
|
|
+ data['source_name'] = 'zy_content'
|
|
|
+ data['name'] = res['data']['name']
|
|
|
+ data['author'] = res['data']['author']
|
|
|
+ data['intro'] = res['data']['intro']
|
|
|
+ data['cover'] = res['data']['cover']
|
|
|
+ data['category_name'] = res['data']['category_name']
|
|
|
+ data['category_id'] = 0
|
|
|
+ data['status'] = res['data']['status']
|
|
|
+ data['sequence'] = response.meta['i']
|
|
|
+ bid = mysql.insertZyBook(data)
|
|
|
+ self.crawler.stats.get_value('bid_list').append(bid)
|
|
|
+ mysql.close()
|
|
|
+ url = self.base_url + '/chapterlist/{}'.format(res['data']['bid']) + self.query
|
|
|
+ yield scrapy.Request(url, meta={"bid": bid, "book_id": res['data']['bid']}, callback=self.parse3)
|
|
|
+
|
|
|
+ def parse3(self, response):
|
|
|
+ res = response.text
|
|
|
+ res = self.json_encode(res)
|
|
|
+ if res['code'] == 0:
|
|
|
+ for chapter in res['data']:
|
|
|
+ chapter['bid'] = response.meta['bid']
|
|
|
+ chapter['source_chapter_id'] = chapter['chapter_id']
|
|
|
+ url = self.base_url + '/chaptercontent/{}/chapterid/{}'.format(response.meta['book_id'], chapter['chapter_id'])
|
|
|
+ yield scrapy.Request(url, meta=chapter, callback=self.parse4)
|
|
|
+
|
|
|
+ def parse4(self, response):
|
|
|
+ res = response.text
|
|
|
+ res = self.json_encode(res)
|
|
|
+ if res['code'] == 0:
|
|
|
+ mysql = msyqlHelper()
|
|
|
+ meta = response.meta
|
|
|
+ data = dict()
|
|
|
+ data['bid'] = meta['bid']
|
|
|
+ data['name'] = res['data']['chapter_name']
|
|
|
+ data['sequence'] = meta['sequence']
|
|
|
+ data['size'] = meta['size']
|
|
|
+ data['is_vip'] = meta['is_vip']
|
|
|
+ data['prev_cid'] = 0
|
|
|
+ data['next_cid'] = 0
|
|
|
+ data['recent_update_at'] = meta['updated_at']
|
|
|
+ data['content'] = res['data']['content']
|
|
|
+ data['ly_chapter_id'] = meta['source_chapter_id']
|
|
|
+ mysql.inseraAll(data)
|
|
|
+ mysql.close()
|
|
|
+
|
|
|
+ def json_encode(self, jsonstr):
|
|
|
+ return json.loads(jsonstr)
|