преди 4 години · daa900b4fc
--- a/ydyspider/.gitignore
+++ b/ydyspider/.gitignore
@@ -0,0 +1,4 @@
 
				+.idea/
			
 
				+*.pyc
			
 
				+*/__pycache__
			
 
				+*/*.pyc
			
--- a/ydyspider/mysql.py
+++ b/ydyspider/mysql.py
@@ -81,3 +81,11 @@ class msyqlHelper(object):
 
				 			result = cursor.fetchone()
			
 
				 		self.conn.commit()
			
 
				 		return result
			
 
				+
			
 
				+	def getLianshang(self):
			
 
				+		sql = "SELECT zw_id FROM books a JOIN book_configs b on a.id = b.bid WHERE b.cp_source = 'lianshang' order by zw_id desc LIMIT 1"
			
 
				+		with self.conn.cursor() as cursor:
			
 
				+			cursor.execute(sql)
			
 
				+			result = cursor.fetchone()
			
 
				+		self.conn.commit()
			
 
				+		return result
			
--- a/ydyspider/pipelines.py
+++ b/ydyspider/pipelines.py
@@ -4,8 +4,28 @@
 
				 #
			
 
				 # Don't forget to add your pipeline to the ITEM_PIPELINES setting
			
 
				 # See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html
			
 
				+import os
			
 
				 
			
 
				 
			
 
				 class YdyspiderPipeline(object):
			
 
				+
			
 
				+    def __init__(self, stats):
			
 
				+        self.__stats = stats
			
 
				+
			
 
				     def process_item(self, item, spider):
			
 
				         return item
			
 
				+
			
 
				+    @classmethod
			
 
				+    def from_crawler(cls, crawler):
			
 
				+        stats = crawler.stats
			
 
				+        return cls(
			
 
				+            stats=stats
			
 
				+        )
			
 
				+
			
 
				+    def close_spider(self, spider):
			
 
				+        bid_list = self.__stats.get_value('bid_list')
			
 
				+        if bid_list is not None:
			
 
				+            for bid in bid_list:
			
 
				+                command = '/usr/local/php/bin/php /home/www/zhuishuyun_wap/artisan book:afs %s ' % bid
			
 
				+                os.system(command)
			
 
				+
			
--- a/ydyspider/settings.py
+++ b/ydyspider/settings.py
@@ -64,9 +64,9 @@ DEFAULT_REQUEST_HEADERS = {
 
				 
			
 
				 # Configure item pipelines
			
 
				 # See http://scrapy.readthedocs.org/en/latest/topics/item-pipeline.html
			
 
				-#ITEM_PIPELINES = {
			
 
				-#    'ydyspider.pipelines.YdyspiderPipeline': 300,
			
 
				-#}
			
 
				+ITEM_PIPELINES = {
			
 
				+    'ydyspider.pipelines.YdyspiderPipeline': 300,
			
 
				+}
			
 
				 
			
 
				 # Enable and configure the AutoThrottle extension (disabled by default)
			
 
				 # See http://doc.scrapy.org/en/latest/topics/autothrottle.html
			
--- a/ydyspider/spiders/ydy.py
+++ b/ydyspider/spiders/ydy.py
@@ -32,6 +32,7 @@ class YunduyunSpider(scrapy.Spider):
 
				 		data['status'] = res['data']['book_state']
			
 
				 		data['sequence'] = response.meta['i']
			
 
				 		bid = mysql.insertbook(data)
			
 
				+		self.crawler.stats.set_value('bid_list', [])
			
 
				 		mysql.close()
			
 
				 		self.logger.info(data)
			
 
				 		yield scrapy.Request("http://www.leyuee.com/services/zwfx.aspx?method=chapterlist&bid=%s&token=sefaf23h7face" % res['data']['book_id'],meta={"bid":bid,"book_id":res['data']['book_id']},callback=self.parse3)
			
--- a/ydyspider/spiders/zwcontentSpider.py
+++ b/ydyspider/spiders/zwcontentSpider.py
@@ -6,10 +6,11 @@ import json
 
				 
			
 
				 class zwcontentSpider(scrapy.Spider):
			
 
				     name = 'zwcontent'
			
 
				-    allowed_domains = ['zwapi.ycsd.cn']
			
 
				-    base_url = 'http://zwapi.ycsd.cn/api/book'
			
 
				+    allowed_domains = ['cp.yqsd.cn']
			
 
				+    base_url = 'http://cp.yqsd.cn/api/book'
			
 
				 
			
 
				     def start_requests(self):
			
 
				+        self.crawler.stats.set_value('bid_list', [])
			
 
				         param = self.bid
			
 
				         bid_list = param.split(',')
			
 
				         for bid in bid_list:
			
@@ -32,6 +33,7 @@ class zwcontentSpider(scrapy.Spider):
 
				         data['status'] = res['data']['status']
			
 
				         data['sequence'] = response.meta['i']
			
 
				         bid = mysql.insertZwBook(data)
			
 
				+        self.crawler.stats.get_value('bid_list').append(bid)
			
 
				         mysql.close()
			
 
				         url = self.base_url + '/chapterlist/{}'.format(res['data']['bid'])
			
 
				         yield scrapy.Request(url, meta={"bid": bid, "book_id": res['data']['bid']}, callback=self.parse3)
			
--- a/ydyspider/spiders/zwcontentlianshangSpider.py
+++ b/ydyspider/spiders/zwcontentlianshangSpider.py
@@ -0,0 +1,37 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+import time
			
 
				+
			
 
				+import scrapy
			
 
				+from ydyspider.mysql import msyqlHelper
			
 
				+import json
			
 
				+from . import zwcontentSpider
			
 
				+
			
 
				+
			
 
				+class zwcontentlianshangSpider(zwcontentSpider.zwcontentSpider):
			
 
				+    name = 'zwcontentlianshnag'
			
 
				+    allowed_domains = ['/cp.yqsd.cn']
			
 
				+    base_url = 'http://cp.yqsd.cn/api/book'
			
 
				+    custom_settings = {
			
 
				+        'DOWNLOAD_DELAY': 0.01,
			
 
				+        'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log'
			
 
				+    }
			
 
				+
			
 
				+    def start_requests(self):
			
 
				+        self.crawler.stats.set_value('bid_list', [])
			
 
				+        mysql = msyqlHelper()
			
 
				+        last_book = mysql.getLianshang()
			
 
				+        bid = last_book['zw_id']
			
 
				+        start_url = self.base_url + '/booklist/lianshang/{}'.format(bid)
			
 
				+        yield scrapy.Request(start_url, callback=self.parse_book_list)
			
 
				+
			
 
				+    def parse_book_list(self, response):
			
 
				+        result = json.loads(response.text)
			
 
				+        if result.get('data') is not None:
			
 
				+            i = 0
			
 
				+            for item in result['data']:
			
 
				+                i = i+1
			
 
				+                if i > 1000:
			
 
				+                    break
			
 
				+                bid = item['id']
			
 
				+                url = self.base_url + '/bookInfo/{}'.format(bid)
			
 
				+                yield scrapy.Request(url, callback=self.parse2, meta={"zw_id": bid, "i": 0})