judian.py 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161
  1. # -*- coding: utf-8 -*-
  2. from content_spider.baseSpider import baseSpider
  3. from content_spider.baseSpider import baseUpdateSpider
  4. from content_spider.baseSpider import fixChapterSpider
  5. from content_spider.baseSpider import baseUpdateBookStatusSpider
  6. from content_spider.Util import md5
  7. import time
  8. import json
  9. name = 'judian'
  10. allowed_domains = ['www.judianbook.com']
  11. source = 'zy_judian'
  12. source_name = 'judian据点'
  13. source_id = 30
  14. base_url = 'http://www.judianbook.com/index/api/{}?token=d5957e72f7711fd001ea29ddf0cff977e58c6342'
  15. category = [{'type': 9, 'name': '武侠', 'channel_id': 1, 'category_name': '武侠仙侠', 'category_id': 21},
  16. {'type': 10, 'name': '仙侠', 'channel_id': 1, 'category_name': '武侠仙侠', 'category_id': 21},
  17. {'type': 11, 'name': '都市', 'channel_id': 1, 'category_name': '都市爱情', 'category_id': 54},
  18. {'type': 13, 'name': '历史', 'channel_id': 1, 'category_name': '特种军旅', 'category_id': 51},
  19. {'type': 14, 'name': '游戏', 'channel_id': 1, 'category_name': '游戏竞技', 'category_id': 19},
  20. {'type': 21, 'name': '玄幻', 'channel_id': 1, 'category_name': '玄幻奇幻', 'category_id': 23},
  21. {'type': 38, 'name': '科幻', 'channel_id': 1, 'category_name': '玄幻奇幻', 'category_id': 23},
  22. {'type': 44, 'name': '同人', 'channel_id': 1, 'category_name': '衍生同人', 'category_id': 125},
  23. {'type': 49, 'name': '短篇男', 'channel_id': 1, 'category_name': '其他作品', 'category_id': 127},
  24. {'type': 2, 'name': '古代言情', 'channel_id': 2, 'category_name': '穿越重生', 'category_id': 83},
  25. {'type': 3, 'name': '现代言情', 'channel_id': 2, 'category_name': '婚恋情感', 'category_id': 98},
  26. {'type': 4, 'name': '校 园言情', 'channel_id': 2, 'category_name': '青春校园', 'category_id': 104},
  27. {'type': 5, 'name': '幻想言情', 'channel_id': 2, 'category_name': '东方玄幻', 'category_id': 96},
  28. {'type': 22, 'name': '穿越重生', 'channel_id': 2, 'category_name': '穿越重生', 'category_id': 83},
  29. {'type': 45, 'name': '耽美', 'channel_id': 2, 'category_name': '耽美同人', 'category_id': 106},
  30. {'type': 46, 'name': '百合', 'channel_id': 2, 'category_name': '其他', 'category_id': 107},
  31. {'type': 47, 'name': '无cp', 'channel_id': 2, 'category_name': '其他', 'category_id': 107},
  32. {'type': 48, 'name': '短篇', 'channel_id': 2, 'category_name': '其他', 'category_id': 107},
  33. {'type': 20, 'name': '推理悬念', 'channel_id': 1, 'category_name': '灵异恐 怖', 'category_id': 81},
  34. {'type': 40, 'name': '恐怖惊悚', 'channel_id': 1, 'category_name': '灵异恐怖', 'category_id': 81},
  35. {'type': 41, 'name': '灵异鬼怪', 'channel_id': 1, 'category_name': '灵异恐怖', 'category_id': 81},
  36. {'type': 42, 'name': '风水秘术', 'channel_id': 1, 'category_name': '灵异恐怖', 'category_id': 81},
  37. {'type': 43, 'name': '探险异闻', 'channel_id': 1, 'category_name': '灵异恐怖', 'category_id': 81},
  38. {'type': 24, 'name': '励志', 'channel_id': 2, 'category_name': '其他', 'category_id': 107},
  39. {'type': 25, 'name': '职场', 'channel_id': 2, 'category_name': '其他', 'category_id': 107},
  40. {'type': 26, 'name': '生活', 'channel_id': 2, 'category_name': '其他', 'category_id': 107},
  41. {'type': 28, 'name': ' 婚姻', 'channel_id': 2, 'category_name': '其他', 'category_id': 107},
  42. {'type': 29, 'name': '教育', 'channel_id': 2, 'category_name': '其他', 'category_id': 107},
  43. {'type': 35, 'name': '心理学', 'channel_id': 2, 'category_name': '其他', 'category_id': 107},
  44. {'type': 36, 'name': '管理', 'channel_id': 2, 'category_name': '其他', 'category_id': 107}]
  45. def get_category(type):
  46. for item in category:
  47. if int(type) == item['type']:
  48. return item
  49. return category[0]
  50. class judianProcess(object):
  51. name = name
  52. allowed_domains = allowed_domains
  53. source = source
  54. source_name = source_name
  55. source_id = source_id
  56. def get_start_url(self):
  57. return base_url.format('BookList') + '&site=500'
  58. def bid_list_result(self, response):
  59. result = json.loads(response.text)
  60. if result is None or result.get('data') is None:
  61. return []
  62. result_list = []
  63. for item in result['data']:
  64. result_list.append({'id': item['bookid']})
  65. return result_list
  66. def get_book_info_url(self, bid):
  67. return base_url.format('BookInfo') + '&book_id={}'.format(bid)
  68. def book_info_result(self, response):
  69. result = json.loads(response.text)
  70. result = result['data']
  71. category_info = get_category(result['type']);
  72. return {
  73. 'bid': result['book_id'], 'name': result['book_name'], 'author': result['author'],
  74. 'intro': result['describe'], 'cover': result['cover'], 'keyword': result['tag'],
  75. 'status':result['state'], 'category': category_info['category_name'],'category_id':category_info['category_id'],
  76. 'channel': category_info['channel_id']
  77. }
  78. def get_chapter_list_url(self, bid):
  79. return base_url.format('BookChapters') + '&book_id={}&size=3000'.format(bid)
  80. def chapter_list_result(self, response):
  81. result = json.loads(response.text)
  82. if result is None or result.get('data') is None:
  83. return []
  84. result_list = []
  85. i = 0
  86. for chapter_item in result['data']:
  87. i = i+1
  88. result_list.append({
  89. 'source_chapter_id': chapter_item['chapnum'], 'name': chapter_item['name'],
  90. 'sequence': chapter_item['chapnum'], 'is_vip':chapter_item['is_pay'],
  91. 'size': chapter_item['word_count'], 'recent_update_at': ''
  92. })
  93. return result_list
  94. def get_chapter_content_url(self, bid, cid):
  95. return base_url.format('BookChapterInfo') + '&book_id={}&chapnum={}&type=json'.format(bid, cid)
  96. def chapter_content_result(self, response):
  97. result = json.loads(response.text)
  98. if result is None:
  99. return {'content': ''}
  100. return {
  101. 'content': "\r\n".join(result['content'])
  102. }
  103. class judianSpider(judianProcess,baseSpider):
  104. name = name
  105. custom_settings = {
  106. 'DOWNLOAD_DELAY': 0.1,
  107. 'SOURCE': source,
  108. 'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log'
  109. }
  110. class judianUpdateSpider(judianProcess,baseUpdateSpider):
  111. name = name + "update"
  112. custom_settings = {
  113. 'DOWNLOAD_DELAY': 0.1,
  114. 'SOURCE': source,
  115. 'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log'
  116. }
  117. class judianFixSpider(judianProcess,fixChapterSpider):
  118. name = name + 'fix'
  119. custom_settings = {
  120. 'DOWNLOAD_DELAY': 0.1,
  121. 'SOURCE': source,
  122. 'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log'
  123. }
  124. class judianBookInfoSpider(judianProcess,baseUpdateBookStatusSpider):
  125. name = name + "bookinfo"
  126. custom_settings = {
  127. 'DOWNLOAD_DELAY': 0.1,
  128. 'SOURCE': source,
  129. 'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log'
  130. }