bookupdate.py 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687
  1. # -*- coding: utf-8 -*-
  2. import random
  3. from content_spider.baseSpider import baseUpdateSpider
  4. from content_spider.baseSpider import baseUpdateBookStatusSpider
  5. import json
  6. import time
  7. class BookupdateSpider(baseUpdateSpider):
  8. name = 'kanshuupdate'
  9. allowed_domains = ['hezuo.lunjian.com']
  10. source = 'zy_kanshu'
  11. source_name = '看书'
  12. source_id = 19
  13. base_url = 'http://hezuo.lunjian.com/open/ksbook/{}?channel_id=10054'
  14. custom_settings = {
  15. 'DOWNLOAD_DELAY': 0.01,
  16. 'SOURCE': source,
  17. 'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log'
  18. }
  19. def get_chapter_list_url(self, bid):
  20. return self.base_url.format('ChapterLists') + '&bookid={}&random={}'.format(bid,random.randint(1,10000))
  21. def chapter_list_result(self, response):
  22. result = json.loads(response.text)
  23. if result is None or result.get('data') is None:
  24. return []
  25. result_list = []
  26. i = 0
  27. for chapter_item in result['data']:
  28. i = i + 1
  29. result_list.append({
  30. 'source_chapter_id': chapter_item['id'], 'name': chapter_item['title'],
  31. 'sequence': i, 'is_vip': 1 if chapter_item['isVip'] else 0,
  32. 'size': 0, 'recent_update_at': chapter_item['lastUpdateTime']
  33. })
  34. return result_list
  35. def get_chapter_content_url(self, bid, cid):
  36. return self.base_url.format('ChapterContent') + '&bookid={}&chapterid={}'.format(bid, cid)
  37. def chapter_content_result(self, response):
  38. result = json.loads(response.text)
  39. if result is None:
  40. return {'content': ''}
  41. return {
  42. 'content': result['data']['content'],
  43. 'size': len(result['data']['content'])
  44. }
  45. class BookupdateStatusSpider(baseUpdateBookStatusSpider):
  46. name = 'kanshuupdatestatus'
  47. allowed_domains = ['hezuo.lunjian.com']
  48. source = 'zy_kanshu'
  49. source_name = '看书'
  50. source_id = 19
  51. base_url = 'http://hezuo.lunjian.com/open/ksbook/{}?channel_id=10054'
  52. custom_settings = {
  53. 'DOWNLOAD_DELAY': 0.01,
  54. 'SOURCE': source,
  55. 'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log'
  56. }
  57. def get_book_info_url(self, bid):
  58. return self.base_url.format('BookDetail') + '&bookid={}'.format(bid)
  59. def book_info_result(self, response):
  60. result = json.loads(response.text)
  61. if result is None or result.get('data') is None:
  62. return None
  63. result = result['data']
  64. return {
  65. 'bid': result['id'], 'name': result['bookTitle'], 'author': result['author'],
  66. 'intro': result['introduction'], 'cover': result['cover'], 'keyword': result['labels'],
  67. 'status': result['state'], 'category': result['category'],'category_id':1,
  68. 'channel': result['channelId']
  69. }