Ver código fonte

zhiyu command

zhaoyang 2 anos atrás
pai
commit
e79048a27f

+ 2 - 1
ydyspider/.gitignore

@@ -2,4 +2,5 @@
 *.pyc
 */__pycache__
 */*.pyc
-.vscode/
+.vscode/
+ydyspider/log/

+ 5 - 0
ydyspider/bash/command.sh

@@ -0,0 +1,5 @@
+#!/bin/bash
+pwd=/home/www/ydyspider
+cd $pwd
+spider_name=$1
+/usr/local/python2.7.14/bin/scrapy crawl $spider_name

+ 8 - 0
ydyspider/mysql.py

@@ -99,3 +99,11 @@ class msyqlHelper(object):
 			result = cursor.fetchone()
 		self.conn.commit()
 		return result
+	
+	def getZyBook(self,zhiyu_book_id):
+		sql = "select id from books where zhiyu_book_id = {}".format(zhiyu_book_id)
+		with self.conn.cursor() as cursor:
+			cursor.execute(sql)
+			result = cursor.fetchone()
+		self.conn.commit()
+		return result

+ 25 - 4
ydyspider/spiders/zycontentSpider.py

@@ -2,6 +2,7 @@
 import scrapy
 from ydyspider.mysql import msyqlHelper
 import json
+import time
 
 
 class zycontentSpider(scrapy.Spider):
@@ -9,14 +10,34 @@ class zycontentSpider(scrapy.Spider):
     allowed_domains = ['60.204.150.173']
     query = '?channel_name=zhuishuyun&channel_key=123456'
     base_url = 'http://60.204.150.173:8094/api/output'
+    custom_settings = {
+        'DOWNLOAD_DELAY': 0.01,
+        'LOG_FILE': 'ydyspider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log'
+    }
 
     def start_requests(self):
         self.crawler.stats.set_value('bid_list', [])
         param = self.bid
-        bid_list = param.split(',')
-        for bid in bid_list:
-            url = self.base_url + '/bookdetail/{}'.format(bid) + self.query
-            yield scrapy.Request(url, callback=self.parse2, meta={"zhiyu_book_id": bid, "i": 0})
+        if param is not None:
+            bid_list = param.split(',')
+            for bid in bid_list:
+                url = self.base_url + '/bookdetail/{}'.format(bid) + self.query
+                yield scrapy.Request(url, callback=self.parse2, meta={"zhiyu_book_id": bid, "i": 0})
+        else:
+            url = self.base_url + '/booklist' + self.query
+            yield scrapy.Request(url, callback=self.parse1, meta={"zhiyu_book_id": bid, "i": 0})
+
+    def parse1(self,response):
+        res = response.text
+        res = self.json_encode(res)
+        book_list = res.get('data')
+        mysql = msyqlHelper()
+        for book_item in book_list:
+            bid = book_item['bid']
+            zhiyu_book = mysql.getZyBook(bid)
+            if zhiyu_book is not None:
+                url = self.base_url + '/bookdetail/{}'.format(bid) + self.query
+                yield scrapy.Request(url, callback=self.parse2, meta={"zhiyu_book_id": bid, "i": 0})
 
     def parse2(self, response):
         mysql = msyqlHelper()