zz 4 år sedan
förälder
incheckning
e36e3ee7ef
2 ändrade filer med 84 tillägg och 0 borttagningar
  1. 11 0
      ydyspider/mysql.py
  2. 73 0
      ydyspider/spiders/ycsd.py

+ 11 - 0
ydyspider/mysql.py

@@ -16,6 +16,17 @@ class msyqlHelper(object):
 		self.conn.commit()
 		return id
 
+
+	def insertbookV2(self,data):
+		now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
+		sql = "insert into books (ly_bid,name,author,intro,cover,category_name,category_id,status,sequence,chapter_count,first_cid,last_cid,size,last_chapter,`created_at`,`updated_at`,source_name) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
+		id = 0
+		with self.conn.cursor() as cursor:
+			res = cursor.execute(sql,(data['ly_bid'],data['name'],data['author'],data['intro'],data['cover'],data['category_name'],data['category_id'],data['status'],data['sequence'],'0','0','0','0','0',now,now,data['source_name']))
+			id = int(cursor.lastrowid)
+		self.conn.commit()
+		return id
+
 	def insertZwBook(self, data):
 		now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
 		sql = "insert into books (zw_id,name,author,intro,cover,category_name,category_id,status,sequence,chapter_count,first_cid,last_cid,size,last_chapter,`created_at`,`updated_at`) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"

+ 73 - 0
ydyspider/spiders/ycsd.py

@@ -0,0 +1,73 @@
+# -*- coding: utf-8 -*-
+import scrapy
+from ydyspider.mysql import msyqlHelper
+import json
+import time
+
+
+class YcsdSpider(scrapy.Spider):
+    name = 'ycsd'
+    allowed_domains = ['cp.yqsd.cn']
+    base_url = 'http://cp.yqsd.cn/cp/booksource/?mcp=zhuishuyun&'
+
+    def start_requests(self):
+        bid_t = self.bid
+        bid_list = bid_t.split(',')
+        for ids in bid_list:
+            url = self.base_url + 'bid={}'.format(ids)
+            yield scrapy.Request(url, callback=self.parse2, meta={"ly_bid": ids, "i": 0})
+
+    def parse2(self, response):
+        mysql = msyqlHelper()
+        res = response.text
+        res = self.json_encode(res)
+        data = dict()
+        data['ly_bid'] = res['data']['book_id']
+        data['source_name'] = 'ycsd'
+        data['name'] = res['data']['book_name']
+        data['author'] = res['data']['book_author']
+        data['intro'] = res['data']['introduction']
+        data['cover'] = res['data']['cover_url']
+        data['category_name'] = res['data']['book_category_name']
+        data['category_id'] = res['data']['book_category_id']
+        data['status'] = res['data']['book_state']
+        data['sequence'] = response.meta['i']
+        bid = mysql.insertbookV2(data)
+        mysql.close()
+        url = self.base_url + 'method=chapterList&bid={}'.format(res['data']['book_id'])
+        yield scrapy.Request(url, meta={"bid": bid, "book_id": res['data']['book_id']}, callback=self.parse3)
+
+    def parse3(self, response):
+        res = response.text
+        res = self.json_encode(res)
+        if res['code'] == 200:
+            for chapter in res['data']:
+                chapter['bid'] = response.meta['bid']
+                bid = response.meta['book_id']
+                url = self.base_url + 'method=chapter&bid={}&cid={}'.format(bid, chapter['chapter_id'])
+                yield scrapy.Request(url, meta=chapter, callback=self.parse4)
+
+    def parse4(self, response):
+        res = response.text
+        res = self.json_encode(res)
+        if res['code'] == 200:
+            mysql = msyqlHelper()
+            meta = response.meta
+            data = dict()
+            data['bid'] = meta['bid']
+            data['name'] = meta['chapter_name']
+            data['sequence'] = meta['chapter_order_number'] + 1
+            data['size'] = len(res['data']['chapter_content'])
+            data['is_vip'] = meta['chapter_need_pay']
+            data['prev_cid'] = 0
+            data['next_cid'] = 0
+            data['recent_update_at'] = time.strftime("%Y-%m-%d %H:%M:%S",
+                                                     time.localtime(meta['chapter_last_update_time']))
+            data['content'] = res['data']['chapter_content']
+            data['ly_chapter_id'] = res['data']['chapter_id']
+            mysql.inseraAll(data)
+            mysql.close()
+
+    def json_encode(self, jsonstr):
+        return json.loads(jsonstr)
+