bookFix.py 1.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253
  1. # -*- coding: utf-8 -*-
  2. from content_spider.baseSpider import fixChapterSpider
  3. import json
  4. import time
  5. class bookFixSpider(fixChapterSpider):
  6. name = 'kanshufix'
  7. allowed_domains = ['hezuo.lunjian.com']
  8. source = 'zy_kanshu'
  9. source_name = '看书'
  10. source_id = 19
  11. base_url = 'http://hezuo.lunjian.com/open/ksbook/{}?channel_id=10054'
  12. custom_settings = {
  13. 'DOWNLOAD_DELAY': 0.01,
  14. 'SOURCE': source,
  15. 'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log'
  16. }
  17. def get_chapter_list_url(self, bid):
  18. return self.base_url.format('ChapterLists') + '&bookid={}'.format(bid)
  19. def chapter_list_result(self, response):
  20. result = json.loads(response.text)
  21. if result is None or result.get('data') is None:
  22. return []
  23. result_list = []
  24. i = 0
  25. for chapter_item in result['data']:
  26. i = i+1
  27. result_list.append({
  28. 'source_chapter_id': chapter_item['id'], 'name': chapter_item['title'],
  29. 'sequence': i, 'is_vip': 1 if chapter_item['isVip'] else 0,
  30. 'size': 0, 'recent_update_at': chapter_item['lastUpdateTime']
  31. })
  32. return result_list
  33. def get_chapter_content_url(self, bid, cid):
  34. return self.base_url.format('ChapterContent') + '&bookid={}&chapterid={}'.format(bid, cid)
  35. def chapter_content_result(self, response):
  36. result = json.loads(response.text)
  37. if result is None:
  38. return {'content': ''}
  39. return {
  40. 'content': result['data']['content'],
  41. 'size': len(result['data']['content'])
  42. }