book.py 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081
  1. # -*- coding: utf-8 -*-
  2. from content_spider.baseSpider import baseSpider
  3. import json
  4. import time
  5. class BookSpider(baseSpider):
  6. name = 'kanshu'
  7. allowed_domains = ['hezuo.lunjian.com']
  8. source = 'zy_kanshu'
  9. source_name = '看书'
  10. source_id = 19
  11. base_url = 'http://hezuo.lunjian.com/open/ksbook/{}?channel_id=10054'
  12. custom_settings = {
  13. 'DOWNLOAD_DELAY': 0.01,
  14. 'SOURCE': source,
  15. 'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log'
  16. }
  17. def get_start_url(self):
  18. return self.base_url.format('bookLists')
  19. def bid_list_result(self, response):
  20. result = json.loads(response.text)
  21. if result is None:
  22. return []
  23. result_list = []
  24. for item in result['data']:
  25. result_list.append({'id': item['id']})
  26. return result_list
  27. def get_book_info_url(self, bid):
  28. return self.base_url.format('BookDetail') + '&bookid={}'.format(bid)
  29. def book_info_result(self, response):
  30. result = json.loads(response.text)
  31. if result is None or result.get('data') is None:
  32. return None
  33. result = result['data']
  34. return {
  35. 'bid': result['id'], 'name': result['bookTitle'], 'author': result['author'],
  36. 'intro': result['introduction'], 'cover': result['cover'], 'keyword': result['labels'],
  37. 'status': result['state'], 'category': result['category'],'category_id':1,
  38. 'channel': result['channelId']
  39. }
  40. def get_chapter_list_url(self, bid):
  41. return self.base_url.format('ChapterLists') + '&bookid={}'.format(bid)
  42. def chapter_list_result(self, response):
  43. result = json.loads(response.text)
  44. if result is None or result.get('data') is None:
  45. return []
  46. result_list = []
  47. i = 0
  48. for chapter_item in result['data']:
  49. i = i+1
  50. result_list.append({
  51. 'source_chapter_id': chapter_item['id'], 'name': chapter_item['title'],
  52. 'sequence': i, 'is_vip': 1 if chapter_item['isVip'] else 0,
  53. 'size': 0, 'recent_update_at': chapter_item['lastUpdateTime']
  54. })
  55. return result_list
  56. def get_chapter_content_url(self, bid, cid):
  57. return self.base_url.format('ChapterContent') + '&bookid={}&chapterid={}'.format(bid, cid)
  58. def chapter_content_result(self, response):
  59. result = json.loads(response.text)
  60. if result is None:
  61. return {'content': ''}
  62. return {
  63. 'content': result['data']['content'],
  64. 'size': len(result['data']['content'])
  65. }