baichuanzw.py 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165
  1. # -*- coding: utf-8 -*-
  2. import time
  3. from content_spider.baseSpider import baseSpider
  4. from content_spider.pipelines import formatcontent
  5. from content_spider.baseSpider import baseUpdateSpider
  6. from content_spider.baseSpider import fixChapterSpider
  7. from content_spider.baseSpider import baseUpdateBookStatusSpider
  8. import json
  9. from content_spider.Util import md5
  10. name = 'baichuanzw'
  11. allowed_domains = ['fenxiao.baichuanzw.com']
  12. source = 'zy_baichuan'
  13. source_name = 'baichuan百川'
  14. source_id = 27
  15. client_id = '37'
  16. key = 'ee09b1cd3fabc8f985dc9cb98e6214ab'
  17. base_url = 'http://fenxiao.baichuanzw.com/api/baichuan/{}?client_id='+client_id+'&sign={}'
  18. category = [
  19. {'cate_id': '100001', 'cate_name': '原创', 'channel_id': 1, 'category_id': 1, 'category_name': '玄幻仙侠','ncategory_id': 23},
  20. {'cate_id': '100004', 'cate_name': '玄幻', 'channel_id': 1, 'category_id': 1, 'category_name': '玄幻仙侠','ncategory_id': 23},
  21. {'cate_id': '100005', 'cate_name': '仙侠', 'channel_id': 1, 'category_id': 1, 'category_name': '玄幻仙侠','ncategory_id': 21},
  22. {'cate_id': '100006', 'cate_name': '军事', 'channel_id': 1, 'category_id': 4, 'category_name': '军事历史','ncategory_id': 51},
  23. {'cate_id': '100007', 'cate_name': '历史', 'channel_id': 1, 'category_id': 4, 'category_name': '军事历史','ncategory_id': 14},
  24. {'cate_id': '100008', 'cate_name': '都市', 'channel_id': 1, 'category_id': 3, 'category_name': '都市暧昧','ncategory_id': 54},
  25. {'cate_id': '100015', 'cate_name': '奇幻', 'channel_id': 1, 'category_id': 1, 'category_name': '玄幻仙侠','ncategory_id': 22},
  26. {'cate_id': '100016', 'cate_name': '游戏', 'channel_id': 1, 'category_id': 7, 'category_name': '游戏竞技','ncategory_id': 19},
  27. {'cate_id': '100017', 'cate_name': '竞技', 'channel_id': 1, 'category_id': 7, 'category_name': '游戏竞技','ncategory_id': 19},
  28. {'cate_id': '100018', 'cate_name': '灵异', 'channel_id': 1, 'category_id': 8, 'category_name': '悬疑灵异','ncategory_id': 81},
  29. {'cate_id': '100019', 'cate_name': '科幻', 'channel_id': 1, 'category_id': 1, 'category_name': '玄幻仙侠','ncategory_id': 22},
  30. {'cate_id': '100021', 'cate_name': '武侠', 'channel_id': 1, 'category_id': 1, 'category_name': '玄幻仙侠','ncategory_id': 21},
  31. {'cate_id': '100026', 'cate_name': '官场', 'channel_id': 1, 'category_id': 3, 'category_name': '都市暧昧','ncategory_id': 55},
  32. {'cate_id': '100027', 'cate_name': '商场', 'channel_id': 1, 'category_id': 3, 'category_name': '都市暧昧','ncategory_id': 55},
  33. {'cate_id': '100043', 'cate_name': '悬疑', 'channel_id': 1, 'category_id': 8, 'category_name': '悬疑灵异','ncategory_id': 81},
  34. {'cate_id': '100002', 'cate_name': '女频', 'channel_id': 2, 'category_id': 87, 'category_name': '现代言情','ncategory_id': 98},
  35. {'cate_id': '100010', 'cate_name': '古言', 'channel_id': 2, 'category_id': 82, 'category_name': '古代言情','ncategory_id': 83},
  36. {'cate_id': '100011', 'cate_name': '现言', 'channel_id': 2, 'category_id': 87, 'category_name': '现代言情','ncategory_id': 98},
  37. {'cate_id': '100012', 'cate_name': '幻言', 'channel_id': 2, 'category_id': 93, 'category_name': '幻想言情','ncategory_id': 110},
  38. {'cate_id': '100013', 'cate_name': '女生仙侠', 'channel_id': 2, 'category_id': 93, 'category_name': '幻想言情','ncategory_id': 97},
  39. {'cate_id': '100014', 'cate_name': '青春', 'channel_id': 2, 'category_id': 101, 'category_name': '浪漫青春','ncategory_id': 103},
  40. {'cate_id': '100020', 'cate_name': '穿越', 'channel_id': 2, 'category_id': 82, 'category_name': '古代言情','ncategory_id': 83},
  41. {'cate_id': '100022', 'cate_name': '同人', 'channel_id': 2, 'category_id': 102, 'category_name': '耽美同人','ncategory_id': 106},
  42. {'cate_id': '100023', 'cate_name': '女生灵异', 'channel_id': 2, 'category_id': 92, 'category_name': '女生灵异','ncategory_id': 95},
  43. {'cate_id': '100024', 'cate_name': '重生', 'channel_id': 2, 'category_id': 82, 'category_name': '古代言情','ncategory_id': 83},
  44. {'cate_id': '100025', 'cate_name': '婚恋', 'channel_id': 2, 'category_id': 87, 'category_name': '现代言情','ncategory_id': 98},
  45. {'cate_id': '100031', 'cate_name': '女生悬疑', 'channel_id': 2, 'category_id': 92, 'category_name': '女生灵异','ncategory_id': 95},
  46. ]
  47. def get_category(cate_id):
  48. for item in category:
  49. if str(item['cate_id']) == cate_id:
  50. return item
  51. return category[0]
  52. class baichuanzw(object):
  53. name = name
  54. allowed_domains = allowed_domains
  55. source = source
  56. source_name = source_name
  57. source_id = source_id
  58. def get_start_url(self):
  59. sign = md5('{}{}'.format(client_id, key))
  60. return base_url.format('getBookList', sign)
  61. def bid_list_result(self, response):
  62. result = json.loads(response.text)
  63. if result is None or result.get('data') is None:
  64. return []
  65. result_list = []
  66. for item in result['data']:
  67. result_list.append({'id': item['id']})
  68. return result_list
  69. def get_book_info_url(self, bid):
  70. sign = md5('{}{}{}'.format(client_id, key, bid))
  71. return base_url.format('getBookInfo', sign) + '&book_id={}'.format(bid)
  72. def book_info_result(self, response):
  73. result = json.loads(response.text)
  74. result = result['data']
  75. category_info = get_category(result['category'])
  76. return {
  77. 'bid': result['id'], 'name': result['name'], 'author': result['author'],
  78. 'intro': result['brief'], 'cover': result['cover'], 'keyword': result['tag'],
  79. 'status': result['complete_status'],
  80. 'category': category_info['category_name'],'category_id':category_info['ncategory_id'],
  81. 'channel': category_info['channel_id']
  82. }
  83. def get_chapter_list_url(self, bid):
  84. sign = md5('{}{}{}'.format(client_id, key, bid))
  85. return base_url.format('getVolumeList', sign) + '&book_id={}'.format(bid)
  86. def chapter_list_result(self, response):
  87. result = json.loads(response.text)
  88. if result is None or result.get('data') is None:
  89. return []
  90. result_list = []
  91. i = 0
  92. for volumeList in result['data']:
  93. for chapter_item in volumeList['chapterlist']:
  94. i = i+1
  95. result_list.append({
  96. 'source_chapter_id': chapter_item['id'], 'name': chapter_item['name'],
  97. 'sequence': i, 'is_vip': 1 if i >= 20 else 0,
  98. 'size': 0, 'recent_update_at': ''
  99. })
  100. return result_list
  101. def get_chapter_content_url(self, bid, cid):
  102. sign = md5('{}{}{}{}'.format(client_id, key, bid, cid))
  103. return base_url.format('Chapterinfo', sign) + '&book_id={}&chapter_id={}'.format(bid, cid)
  104. def chapter_content_result(self, response):
  105. result = json.loads(response.text)
  106. if result is None:
  107. return {'content': ''}
  108. return {
  109. 'content': result['data']['content'],
  110. 'size': len(result['data']['content'])
  111. }
  112. class baichuanzwSpider(baichuanzw, baseSpider):
  113. name = 'baichuanzw'
  114. custom_settings = {
  115. 'DOWNLOAD_DELAY': 0.1,
  116. 'SOURCE': source,
  117. 'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log'
  118. }
  119. class baichuanzwupdateSpider(baichuanzw, baseUpdateSpider):
  120. name = 'baichuanzwupdate'
  121. custom_settings = {
  122. 'DOWNLOAD_DELAY': 0.1,
  123. 'SOURCE': source,
  124. 'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log'
  125. }
  126. class baichuanzwfixSpider(baichuanzw, fixChapterSpider):
  127. name = 'baichuanzwfix'
  128. custom_settings = {
  129. 'DOWNLOAD_DELAY': 0.1,
  130. 'SOURCE': source,
  131. 'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log'
  132. }
  133. class baichuanzwBookInfoSpider(baichuanzw, baseUpdateBookStatusSpider):
  134. name = 'baichuanzwbookstatusinfo'
  135. custom_settings = {
  136. 'DOWNLOAD_DELAY': 0.1,
  137. 'SOURCE': source,
  138. 'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log'
  139. }