12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273 |
- # -*- coding: utf-8 -*-
- import scrapy
- from ydyspider.mysql import msyqlHelper
- import json
- import time
- class YcsdSpider(scrapy.Spider):
- name = 'ycsd'
- allowed_domains = ['cp.yqsd.cn']
- base_url = 'http://cp.yqsd.cn/cp/booksource/?mcp=zhuishuyun&'
- def start_requests(self):
- bid_t = self.bid
- bid_list = bid_t.split(',')
- for ids in bid_list:
- url = self.base_url + 'method=bookinfo&bid={}'.format(ids)
- yield scrapy.Request(url, callback=self.parse2, meta={"ly_bid": ids, "i": 0})
- def parse2(self, response):
- mysql = msyqlHelper()
- res = response.text
- res = self.json_encode(res)
- data = dict()
- data['ly_bid'] = res['data']['book_id']
- data['source_name'] = 'ycsd'
- data['name'] = res['data']['book_name']
- data['author'] = res['data']['book_author']
- data['intro'] = res['data']['introduction']
- data['cover'] = res['data']['cover_url']
- data['category_name'] = res['data']['book_category_name']
- data['category_id'] = res['data']['book_category_id']
- data['status'] = res['data']['book_state']
- data['sequence'] = response.meta['i']
- bid = mysql.insertbookV2(data)
- mysql.close()
- url = self.base_url + 'method=chapterList&bid={}'.format(res['data']['book_id'])
- yield scrapy.Request(url, meta={"bid": bid, "book_id": res['data']['book_id']}, callback=self.parse3)
- def parse3(self, response):
- res = response.text
- res = self.json_encode(res)
- if res['code'] == 200:
- for chapter in res['data']:
- chapter['bid'] = response.meta['bid']
- bid = response.meta['book_id']
- url = self.base_url + 'method=chapter&bid={}&cid={}'.format(bid, chapter['chapter_id'])
- yield scrapy.Request(url, meta=chapter, callback=self.parse4)
- def parse4(self, response):
- res = response.text
- res = self.json_encode(res)
- if res['code'] == 200:
- mysql = msyqlHelper()
- meta = response.meta
- data = dict()
- data['bid'] = meta['bid']
- data['name'] = meta['chapter_name']
- data['sequence'] = meta['chapter_order_number'] + 1
- data['size'] = len(res['data']['chapter_content'])
- data['is_vip'] = meta['chapter_need_pay']
- data['prev_cid'] = 0
- data['next_cid'] = 0
- data['recent_update_at'] = time.strftime("%Y-%m-%d %H:%M:%S",
- time.localtime(meta['chapter_last_update_time']))
- data['content'] = res['data']['chapter_content']
- data['ly_chapter_id'] = res['data']['chapter_id']
- mysql.inseraAll(data)
- mysql.close()
- def json_encode(self, jsonstr):
- return json.loads(jsonstr)
|