zz před 4 roky
rodič
revize
9e37e76867
2 změnil soubory, kde provedl 70 přidání a 1 odebrání
  1. 1 1
      ydyspider/spiders/7lou.py
  2. 69 0
      ydyspider/spiders/zwcontentSpider.py

+ 1 - 1
ydyspider/spiders/7lou.py

@@ -9,7 +9,7 @@ class A7louSpider(scrapy.Spider):
 	base_url = 'http://zwapi.ycsd.cn/api/book'
 
 	def start_requests(self):
-		url = self.base_url + '/booklist'
+		url = self.base_url + '/booklist?cp=zw-7lou'
 		yield scrapy.Request(url, callback=self.parse)
 
 	def parse(self, response):

+ 69 - 0
ydyspider/spiders/zwcontentSpider.py

@@ -0,0 +1,69 @@
+# -*- coding: utf-8 -*-
+import scrapy
+from ydyspider.mysql import msyqlHelper
+import json
+
+
+class zwcontentSpider(scrapy.Spider):
+    name = 'zwcontent'
+    allowed_domains = ['zwapi.ycsd.cn']
+    base_url = 'http://zwapi.ycsd.cn/api/book'
+
+    def start_requests(self):
+        param = self.bid
+        bid_list = param.split(',')
+        for bid in bid_list:
+            url = self.base_url + '/bookInfo/{}'.format(bid)
+            yield scrapy.Request(url, callback=self.parse2, meta={"zw_id": bid, "i": 0})
+
+    def parse2(self, response):
+        mysql = msyqlHelper()
+        res = response.text
+        res = self.json_encode(res)
+        data = dict()
+        data['zw_bid'] = res['data']['bid']
+        data['name'] = res['data']['name']
+        data['author'] = res['data']['author']
+        data['intro'] = res['data']['intro']
+        data['cover'] = res['data']['cover']
+        data['category_name'] = res['data']['category_name']
+        data['category_id'] = 0
+        data['status'] = res['data']['status']
+        data['sequence'] = response.meta['i']
+        bid = mysql.insertZwBook(data)
+        mysql.close()
+        url = self.base_url + '/chapterlist/{}'.format(res['data']['bid'])
+        yield scrapy.Request(url, meta={"bid": bid, "book_id": res['data']['bid']}, callback=self.parse3)
+
+    def parse3(self, response):
+        res = response.text
+        res = self.json_encode(res)
+        if res['code'] == 0:
+            for chapter in res['data']:
+                chapter['bid'] = response.meta['bid']
+                chapter['source_chapter_id'] = chapter['chapter_id']
+                url = self.base_url + '/chapterContent/{}/{}'.format(response.meta['book_id'], chapter['chapter_id'])
+                yield scrapy.Request(url, meta=chapter, callback=self.parse4)
+
+    def parse4(self, response):
+        res = response.text
+        res = self.json_encode(res)
+        if res['code'] == 0:
+            mysql = msyqlHelper()
+            meta = response.meta
+            data = dict()
+            data['bid'] = meta['bid']
+            data['name'] = res['data']['chapter_name']
+            data['sequence'] = meta['sequence']
+            data['size'] = meta['size']
+            data['is_vip'] = meta['is_vip']
+            data['prev_cid'] = 0
+            data['next_cid'] = 0
+            data['recent_update_at'] = meta['update_at']
+            data['content'] = res['data']['content']
+            data['ly_chapter_id'] = meta['source_chapter_id']
+            mysql.inseraAll(data)
+            mysql.close()
+
+    def json_encode(self, jsonstr):
+        return json.loads(jsonstr)