|
@@ -2,76 +2,72 @@
|
|
|
import scrapy
|
|
|
from ydyspider.mysql import msyqlHelper
|
|
|
import json
|
|
|
-import time
|
|
|
|
|
|
-class YunduyunSpider(scrapy.Spider):
|
|
|
- name = 'yunduyun'
|
|
|
- allowed_domains = ['leyuee.com']
|
|
|
- start_urls = ['http://www.leyuee.com/services/zwfx.aspx?method=booklist&token=sefaf23h7face']
|
|
|
+class A7louSpider(scrapy.Spider):
|
|
|
+ name = '7lou'
|
|
|
+ allowed_domains = ['zwapi.ycsd.cn']
|
|
|
+ base_url = ''
|
|
|
+
|
|
|
+ def start_requests(self):
|
|
|
+ url = self.base_url + '/booklist'
|
|
|
+ yield scrapy.Request(url, callback=self.parse)
|
|
|
+
|
|
|
def parse(self, response):
|
|
|
- res = response.text
|
|
|
- res = self.json_encode(res)
|
|
|
- self.logger.info(res);
|
|
|
- i = 0
|
|
|
+ result = self.json_encode(response.txt)
|
|
|
mysql = msyqlHelper()
|
|
|
- #id = 3
|
|
|
- #yield scrapy.Request("http://www.leyuee.com/services/zwfx.aspx?method=bookinfo&token=sefaf23h7face&bid=%s" % id,callback=self.parse2,meta={"ly_bid":id,"i":i})
|
|
|
-
|
|
|
- for item in res['data']:
|
|
|
- if item['book_id'] <=1501:
|
|
|
- continue
|
|
|
- exist = mysql.selectbylyid(item['book_id'])
|
|
|
+ for item in result['data']:
|
|
|
+ exist = mysql.getByZwSource(item['bid'])
|
|
|
if exist is not None:
|
|
|
- self.logger.info(exist)
|
|
|
continue
|
|
|
- yield scrapy.Request("http://www.leyuee.com/services/zwfx.aspx?method=bookinfo&token=sefaf23h7face&bid=%s" % item['book_id'],callback=self.parse2,meta={"ly_bid":item['book_id'],"i":i})
|
|
|
+ url = self.base_url + '/bookInfo/{}'.format(item['bid'])
|
|
|
+ yield scrapy.Request(url, callback=self.parse2, meta={"zw_id": item['bid'], "i":i})
|
|
|
|
|
|
- def parse2(self,response):
|
|
|
+ def parse2(self, response):
|
|
|
mysql = msyqlHelper()
|
|
|
res = response.text
|
|
|
res = self.json_encode(res)
|
|
|
data = dict()
|
|
|
- data['ly_bid'] = res['data']['book_id']
|
|
|
- data['name'] = res['data']['book_name']
|
|
|
- data['author'] = res['data']['book_author']
|
|
|
- data['intro'] = res['data']['introduction']
|
|
|
- data['cover'] = res['data']['cover_url']
|
|
|
- data['category_name'] = res['data']['book_tags']
|
|
|
- data['category_id'] = res['data']['book_category_id']
|
|
|
- data['status'] = res['data']['book_state']
|
|
|
- data['sequence'] = response.meta['i']
|
|
|
- bid = mysql.insertbook(data)
|
|
|
- mysql.close()
|
|
|
- self.logger.info(data)
|
|
|
- yield scrapy.Request("http://www.leyuee.com/services/zwfx.aspx?method=chapterlist&bid=%s&token=sefaf23h7face" % res['data']['book_id'],meta={"bid":bid,"book_id":res['data']['book_id']},callback=self.parse3)
|
|
|
+ data['zw_bid'] = res['data']['bid']
|
|
|
+ data['name'] = res['data']['name']
|
|
|
+ data['author'] = res['data']['author']
|
|
|
+ data['intro'] = res['data']['intro']
|
|
|
+ data['cover'] = res['data']['cover']
|
|
|
+ data['category_name'] = res['data']['category_name']
|
|
|
+ data['category_id'] = 0
|
|
|
+ data['status'] = res['data']['status']
|
|
|
+ data['sequence'] = response.meta['i']
|
|
|
+ bid = mysql.insertZwBook(data)
|
|
|
+ mysql.close()
|
|
|
+ url = self.base_url + '/chapterlist/{}'.format(res['data']['bid'])
|
|
|
+ yield scrapy.Request(url, meta={"bid": bid, "book_id": res['data']['book_id']}, callback=self.parse3)
|
|
|
|
|
|
- def parse3(self,response):
|
|
|
- res = response.text;
|
|
|
+ def parse3(self, response):
|
|
|
+ res = response.text
|
|
|
res = self.json_encode(res)
|
|
|
- if res['code'] == 200:
|
|
|
- #mysql = msyqlHelper()
|
|
|
- for volume in res['data']:
|
|
|
- for chapter in volume['chapters']:
|
|
|
- chapter['bid'] = response.meta['bid']
|
|
|
- yield scrapy.Request('http://www.leyuee.com/services/zwfx.aspx?method=chapter&bid=%s&cid=%s&token=sefaf23h7face' % (response.meta['book_id'],chapter['chapter_id']),meta=chapter,callback=self.parse4)
|
|
|
-
|
|
|
- def parse4(self,response):
|
|
|
- res = response.text;
|
|
|
+ if res['code'] == 0:
|
|
|
+ for chapter in res['data']:
|
|
|
+ chapter['bid'] = response.meta['bid']
|
|
|
+ chapter['source_chapter_id'] = chapter['chapter_id']
|
|
|
+ url = self.base_url + '/chapterContent/{}/{}'.format(response.meta['book_id'], chapter['chapter_id'])
|
|
|
+ yield scrapy.Request(url, meta=chapter, callback=self.parse4)
|
|
|
+
|
|
|
+ def parse4(self, response):
|
|
|
+ res = response.text
|
|
|
res = self.json_encode(res)
|
|
|
if res['code'] == 200:
|
|
|
mysql = msyqlHelper()
|
|
|
meta = response.meta
|
|
|
data = dict()
|
|
|
data['bid'] = meta['bid']
|
|
|
- data['name'] = meta['chapter_name']
|
|
|
- data['sequence'] = meta['chapter_order_number']+1
|
|
|
- data['size'] = len(res['data']['chapter_content'])
|
|
|
- data['is_vip'] = meta['chapter_need_pay']
|
|
|
+ data['name'] = meta['name']
|
|
|
+ data['sequence'] = meta['sequence']
|
|
|
+ data['size'] = meta['size']
|
|
|
+ data['is_vip'] = meta['is_vip']
|
|
|
data['prev_cid'] = 0
|
|
|
data['next_cid'] = 0
|
|
|
- data['recent_update_at'] = time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(meta['chapter_last_update_time']))
|
|
|
+ data['recent_update_at'] = meta['update_at']
|
|
|
data['content'] = res['data']['chapter_content']
|
|
|
- data['ly_chapter_id'] = res['data']['chapter_id']
|
|
|
+ data['ly_chapter_id'] = meta['source_chapter_id']
|
|
|
mysql.inseraAll(data)
|
|
|
mysql.close()
|
|
|
|