simpleSpider.py 2.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677
  1. # -*- coding: utf-8 -*-
  2. import time
  3. from content_spider.baseSpider import baseSpider
  4. from content_spider.baseSpider import baseUpdateSpider
  5. from content_spider.baseSpider import fixChapterSpider
  6. from content_spider.baseSpider import baseUpdateBookStatusSpider
  7. class simpleProcess(object):
  8. name = ''
  9. allowed_domains = []
  10. source = ''
  11. source_name = ''
  12. source_id = ''
  13. def get_start_url(self):
  14. raise NotImplementedError
  15. def bid_list_result(self, response):
  16. result = json.loads(response.text)
  17. if result is None or result.get('data') is None:
  18. return []
  19. result_list = []
  20. for item in result['data']:
  21. result_list.append({'id': item['id']})
  22. return result_list
  23. def get_book_info_url(self, bid):
  24. raise NotImplementedError
  25. def book_info_result(self, response):
  26. raise NotImplementedError
  27. def get_chapter_list_url(self, bid):
  28. raise NotImplementedError
  29. def chapter_list_result(self, response):
  30. raise NotImplementedError
  31. def get_chapter_content_url(self, bid, cid):
  32. raise NotImplementedError
  33. def chapter_content_result(self, response):
  34. raise NotImplementedError
  35. class simpleProcessSpider(simpleProcess, baseSpider):
  36. name = simpleProcess.name
  37. custom_settings = {
  38. 'DOWNLOAD_DELAY': 0.1,
  39. 'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log'
  40. }
  41. class simpleProcessupdateSpider(simpleProcess, baseUpdateSpider):
  42. name = simpleProcess.name + 'update'
  43. custom_settings = {
  44. 'DOWNLOAD_DELAY': 0.1,
  45. 'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log'
  46. }
  47. class simpleProcessfixSpider(simpleProcess, fixChapterSpider):
  48. name = simpleProcess.name +'fix'
  49. custom_settings = {
  50. 'DOWNLOAD_DELAY': 0.1,
  51. 'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log'
  52. }
  53. class simpleProcessBookInfoSpider(simpleProcess, baseUpdateBookStatusSpider):
  54. name = simpleProcess.name +'bookstatusinfo'
  55. custom_settings = {
  56. 'DOWNLOAD_DELAY': 0.1,
  57. 'LOG_FILE': 'content_spider/log/' + name + time.strftime("%Y-%m-%d", time.localtime()) + '.log'
  58. }