před 4 roky · 728916ae92
--- a/ydyspider/mysql.py
+++ b/ydyspider/mysql.py
@@ -4,7 +4,6 @@ import pymysql.cursors
 
				 class msyqlHelper(object):
			
 
				 	def __init__(self):
			
 
				 		self.conn = pymysql.connect(host='rm-bp1sc28q8w1slr0l4.mysql.rds.aliyuncs.com',user='zhuishuyun',password='Zhuishu!zwkj2066',db='yueduyun',charset='utf8mb4',cursorclass=pymysql.cursors.DictCursor)
			
 
				-		#self.conn = pymysql.connect(host='rm-bp1z1dto3n2rdb02f.mysql.rds.aliyuncs.com',user='yueduyun',password='yueduyun2017#Ydy',db='yueduyun',charset='utf8mb4',cursorclass=pymysql.cursors.DictCursor)
			
 
				 		self.encoding = 'utf-8'
			
 
				 	
			
 
				 	def insertbook(self,data):
			
@@ -16,6 +15,16 @@ class msyqlHelper(object):
 
				 			id = int(cursor.lastrowid)
			
 
				 		self.conn.commit()
			
 
				 		return id
			
 
				+
			
 
				+	def insertZwBook(self, data):
			
 
				+		now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
			
 
				+		sql = "insert into books (zw_id,name,author,intro,cover,category_name,category_id,status,sequence,chapter_count,first_cid,last_cid,size,last_chapter,`created_at`,`updated_at`) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
			
 
				+		with self.conn.cursor() as cursor:
			
 
				+			res = cursor.execute(sql,(data['zw_id'],data['name'],data['author'],data['intro'],data['cover'],data['category_name'],data['category_id'],data['status'],data['sequence'],'0','0','0','0','0',now,now))
			
 
				+			id = int(cursor.lastrowid)
			
 
				+		self.conn.commit()
			
 
				+		return id
			
 
				+
			
 
				 	def inseraAll(self,data):
			
 
				 		now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
			
 
				 		sql = "INSERT INTO `chapters` (`bid`, `name`,`sequence`,`size`,`is_vip`,`prev_cid`,`next_cid`,`recent_update_at`,`created_at`,`updated_at`,`content`,`ly_chapter_id`) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
			
@@ -24,14 +33,14 @@ class msyqlHelper(object):
 
				 			id = int(cursor.lastrowid)
			
 
				 		self.conn.commit()
			
 
				 		return id
			
 
				+
			
 
				 	def selectbylyid(self,id):
			
 
				-		result = None
			
 
				 		with self.conn.cursor() as cursor:
			
 
				 			sql = "select ly_bid from books where ly_bid=%s"
			
 
				 			cursor.execute(sql,(id))
			
 
				 			result = cursor.fetchone()
			
 
				 		self.conn.commit()
			
 
				-		return 	result
			
 
				+		return result
			
 
				 	
			
 
				 	def getUncaompleteBook(self):
			
 
				 		now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
			
@@ -42,6 +51,7 @@ class msyqlHelper(object):
 
				 			result = cursor.fetchall()
			
 
				 		self.conn.commit()
			
 
				 		return result
			
 
				+
			
 
				 	def getChapterByBidAndName(self,bid,name):
			
 
				 		sql = "select id from chapters where bid=%s and name=%s"
			
 
				 		result = None
			
@@ -50,5 +60,13 @@ class msyqlHelper(object):
 
				 			result = cursor.fetchone()
			
 
				 		self.conn.commit()
			
 
				 		return result
			
 
				+
			
 
				 	def close(self):
			
 
				 		self.conn.close()
			
 
				+	def getByZwSource(self, bid):
			
 
				+		sql = 'select id from books where zw_id=%s'
			
 
				+		with self.__conn.cursor() as cursor:
			
 
				+			cursor.execute(sql, (bid, ))
			
 
				+			result = cursor.fetchone()
			
 
				+		self.__conn.commit()
			
 
				+		return result
			
--- a/ydyspider/spiders/7lou.py
+++ b/ydyspider/spiders/7lou.py
@@ -2,76 +2,72 @@
 
				 import scrapy
			
 
				 from ydyspider.mysql import msyqlHelper
			
 
				 import json
			
 
				-import time
			
 
				 
			
 
				-class YunduyunSpider(scrapy.Spider):
			
 
				-	name = 'yunduyun'
			
 
				-	allowed_domains = ['leyuee.com']
			
 
				-	start_urls = ['http://www.leyuee.com/services/zwfx.aspx?method=booklist&token=sefaf23h7face']
			
 
				+class A7louSpider(scrapy.Spider):
			
 
				+	name = '7lou'
			
 
				+	allowed_domains = ['zwapi.ycsd.cn']
			
 
				+	base_url = ''
			
 
				+
			
 
				+	def start_requests(self):
			
 
				+		url = self.base_url + '/booklist'
			
 
				+		yield scrapy.Request(url, callback=self.parse)
			
 
				+
			
 
				 	def parse(self, response):
			
 
				-        	res = response.text
			
 
				-		res = self.json_encode(res)
			
 
				-		self.logger.info(res);
			
 
				-		i = 0
			
 
				+		result = self.json_encode(response.txt)
			
 
				 		mysql = msyqlHelper()
			
 
				-		#id = 3
			
 
				-		#yield scrapy.Request("http://www.leyuee.com/services/zwfx.aspx?method=bookinfo&token=sefaf23h7face&bid=%s" % id,callback=self.parse2,meta={"ly_bid":id,"i":i})
			
 
				-		
			
 
				-		for item in res['data']:
			
 
				-			if item['book_id'] <=1501:
			
 
				-				continue
			
 
				-			exist = mysql.selectbylyid(item['book_id'])
			
 
				+		for item in result['data']:
			
 
				+			exist = mysql.getByZwSource(item['bid'])
			
 
				 			if exist is not None:
			
 
				-				self.logger.info(exist)
			
 
				 				continue
			
 
				-			yield scrapy.Request("http://www.leyuee.com/services/zwfx.aspx?method=bookinfo&token=sefaf23h7face&bid=%s" % item['book_id'],callback=self.parse2,meta={"ly_bid":item['book_id'],"i":i})
			
 
				+			url = self.base_url + '/bookInfo/{}'.format(item['bid'])
			
 
				+			yield scrapy.Request(url, callback=self.parse2, meta={"zw_id": item['bid'], "i":i})
			
 
				 
			
 
				-	def parse2(self,response):
			
 
				+	def parse2(self, response):
			
 
				 		mysql = msyqlHelper()
			
 
				 		res = response.text
			
 
				 		res = self.json_encode(res)
			
 
				 		data = dict()
			
 
				-		data['ly_bid'] = res['data']['book_id']
			
 
				-		data['name'] = res['data']['book_name']
			
 
				-        	data['author'] = res['data']['book_author']
			
 
				-        	data['intro'] = res['data']['introduction']
			
 
				-        	data['cover'] = res['data']['cover_url']
			
 
				-        	data['category_name'] = res['data']['book_tags']
			
 
				-        	data['category_id'] = res['data']['book_category_id']
			
 
				-        	data['status'] = res['data']['book_state']
			
 
				-        	data['sequence'] = response.meta['i']
			
 
				-        	bid = mysql.insertbook(data)
			
 
				-        	mysql.close()
			
 
				-		self.logger.info(data)
			
 
				-		yield scrapy.Request("http://www.leyuee.com/services/zwfx.aspx?method=chapterlist&bid=%s&token=sefaf23h7face" % res['data']['book_id'],meta={"bid":bid,"book_id":res['data']['book_id']},callback=self.parse3)
			
 
				+		data['zw_bid'] = res['data']['bid']
			
 
				+		data['name'] = res['data']['name']
			
 
				+		data['author'] = res['data']['author']
			
 
				+		data['intro'] = res['data']['intro']
			
 
				+		data['cover'] = res['data']['cover']
			
 
				+		data['category_name'] = res['data']['category_name']
			
 
				+		data['category_id'] = 0
			
 
				+		data['status'] = res['data']['status']
			
 
				+		data['sequence'] = response.meta['i']
			
 
				+		bid = mysql.insertZwBook(data)
			
 
				+		mysql.close()
			
 
				+		url = self.base_url + '/chapterlist/{}'.format(res['data']['bid'])
			
 
				+		yield scrapy.Request(url, meta={"bid": bid, "book_id": res['data']['book_id']}, callback=self.parse3)
			
 
				 	
			
 
				-	def parse3(self,response):
			
 
				-		res = response.text;
			
 
				+	def parse3(self, response):
			
 
				+		res = response.text
			
 
				 		res = self.json_encode(res)
			
 
				-		if res['code']  == 200:
			
 
				-			#mysql = msyqlHelper()
			
 
				-			for volume in res['data']:
			
 
				-				for chapter in volume['chapters']:
			
 
				-					chapter['bid'] = response.meta['bid']
			
 
				-					yield scrapy.Request('http://www.leyuee.com/services/zwfx.aspx?method=chapter&bid=%s&cid=%s&token=sefaf23h7face' % (response.meta['book_id'],chapter['chapter_id']),meta=chapter,callback=self.parse4)
			
 
				-	
			
 
				-	def parse4(self,response):
			
 
				-		res = response.text;
			
 
				+		if res['code'] == 0:
			
 
				+			for chapter in res['data']:
			
 
				+				chapter['bid'] = response.meta['bid']
			
 
				+				chapter['source_chapter_id'] = chapter['chapter_id']
			
 
				+				url = self.base_url + '/chapterContent/{}/{}'.format(response.meta['book_id'], chapter['chapter_id'])
			
 
				+				yield scrapy.Request(url, meta=chapter, callback=self.parse4)
			
 
				+
			
 
				+	def parse4(self, response):
			
 
				+		res = response.text
			
 
				 		res = self.json_encode(res)
			
 
				 		if res['code'] == 200:
			
 
				 			mysql = msyqlHelper()
			
 
				 			meta = response.meta
			
 
				 			data = dict()
			
 
				 			data['bid'] = meta['bid']
			
 
				-			data['name'] = meta['chapter_name']
			
 
				-			data['sequence'] = meta['chapter_order_number']+1
			
 
				-			data['size'] = len(res['data']['chapter_content'])
			
 
				-			data['is_vip'] = meta['chapter_need_pay']
			
 
				+			data['name'] = meta['name']
			
 
				+			data['sequence'] = meta['sequence']
			
 
				+			data['size'] = meta['size']
			
 
				+			data['is_vip'] = meta['is_vip']
			
 
				 			data['prev_cid'] = 0
			
 
				 			data['next_cid'] = 0
			
 
				-			data['recent_update_at'] = time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(meta['chapter_last_update_time']))
			
 
				+			data['recent_update_at'] = meta['update_at']
			
 
				 			data['content'] = res['data']['chapter_content']
			
 
				-			data['ly_chapter_id'] = res['data']['chapter_id']
			
 
				+			data['ly_chapter_id'] = meta['source_chapter_id']
			
 
				 			mysql.inseraAll(data)
			
 
				 			mysql.close()