ycsd.py 2.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273
  1. # -*- coding: utf-8 -*-
  2. import scrapy
  3. from ydyspider.mysql import msyqlHelper
  4. import json
  5. import time
  6. class YcsdSpider(scrapy.Spider):
  7. name = 'ycsd'
  8. allowed_domains = ['cp.yqsd.cn']
  9. base_url = 'http://cp.yqsd.cn/cp/booksource/?mcp=zhuishuyun&'
  10. def start_requests(self):
  11. bid_t = self.bid
  12. bid_list = bid_t.split(',')
  13. for ids in bid_list:
  14. url = self.base_url + 'method=bookinfo&bid={}'.format(ids)
  15. yield scrapy.Request(url, callback=self.parse2, meta={"ly_bid": ids, "i": 0})
  16. def parse2(self, response):
  17. mysql = msyqlHelper()
  18. res = response.text
  19. res = self.json_encode(res)
  20. data = dict()
  21. data['ly_bid'] = res['data']['book_id']
  22. data['source_name'] = 'ycsd'
  23. data['name'] = res['data']['book_name']
  24. data['author'] = res['data']['book_author']
  25. data['intro'] = res['data']['introduction']
  26. data['cover'] = res['data']['cover_url']
  27. data['category_name'] = res['data']['book_category_name']
  28. data['category_id'] = res['data']['book_category_id']
  29. data['status'] = res['data']['book_state']
  30. data['sequence'] = response.meta['i']
  31. bid = mysql.insertbookV2(data)
  32. mysql.close()
  33. url = self.base_url + 'method=chapterList&bid={}'.format(res['data']['book_id'])
  34. yield scrapy.Request(url, meta={"bid": bid, "book_id": res['data']['book_id']}, callback=self.parse3)
  35. def parse3(self, response):
  36. res = response.text
  37. res = self.json_encode(res)
  38. if res['code'] == 200:
  39. for chapter in res['data']:
  40. chapter['bid'] = response.meta['bid']
  41. bid = response.meta['book_id']
  42. url = self.base_url + 'method=chapter&bid={}&cid={}'.format(bid, chapter['chapter_id'])
  43. yield scrapy.Request(url, meta=chapter, callback=self.parse4)
  44. def parse4(self, response):
  45. res = response.text
  46. res = self.json_encode(res)
  47. if res['code'] == 200:
  48. mysql = msyqlHelper()
  49. meta = response.meta
  50. data = dict()
  51. data['bid'] = meta['bid']
  52. data['name'] = meta['chapter_name']
  53. data['sequence'] = meta['chapter_order_number'] + 1
  54. data['size'] = len(res['data']['chapter_content'])
  55. data['is_vip'] = meta['chapter_need_pay']
  56. data['prev_cid'] = 0
  57. data['next_cid'] = 0
  58. data['recent_update_at'] = time.strftime("%Y-%m-%d %H:%M:%S",
  59. time.localtime(meta['chapter_last_update_time']))
  60. data['content'] = res['data']['chapter_content']
  61. data['ly_chapter_id'] = res['data']['chapter_id']
  62. mysql.inseraAll(data)
  63. mysql.close()
  64. def json_encode(self, jsonstr):
  65. return json.loads(jsonstr)