zhaoyang před 2 roky
rodič
revize
7c0da8ffbf
2 změnil soubory, kde provedl 180 přidání a 21 odebrání
  1. 97 2
      content_spider/spiders/kanshu/book.py
  2. 83 19
      content_spider/temp_test.py

+ 97 - 2
content_spider/spiders/kanshu/book.py

@@ -1,10 +1,105 @@
 # -*- coding: utf-8 -*-
 
 from content_spider.baseSpider import baseSpider
+from content_spider.Util import get_category_by_name
 import json
 import time
 
 
+
+
+category = [{'id': 19, 'name': '斗气升级', 'channel_id': 1, 'category_id': 23, 'category_name': '玄幻奇幻'},
+ {'id': 20, 'name': '元素魔法', 'channel_id': 1, 'category_id': 23, 'category_name': '玄幻奇幻'},
+ {'id': 21, 'name': '血族僵尸', 'channel_id': 1, 'category_id': 23, 'category_name': '玄幻奇幻'},
+ {'id': 22, 'name': '东方玄幻', 'channel_id': 1, 'category_id': 23, 'category_name': '玄幻奇幻'},
+ {'id': 23, 'name': '西方奇幻', 'channel_id': 1, 'category_id': 23, 'category_name': '玄幻奇幻'},
+ {'id': 24, 'name': '异世大陆', 'channel_id': 1, 'category_id': 23, 'category_name': '玄幻奇幻'},
+ {'id': 101, 'name': '异界征战', 'channel_id': 1, 'category_id': 23, 'category_name': '玄幻奇幻'},
+ {'id': 26, 'name': '传统武侠', 'channel_id': 1, 'category_id': 21, 'category_name': '武侠仙侠'},
+ {'id': 27, 'name': '古典仙 侠', 'channel_id': 1, 'category_id': 21, 'category_name': '武侠仙侠'},
+ {'id': 29, 'name': '现代修真', 'channel_id': 1, 'category_id': 21, 'category_name': '武侠仙侠'},
+ {'id': 30, 'name': '洪荒封神', 'channel_id': 1, 'category_id': 21, 'category_name': '武侠仙侠'},
+ {'id': 14, 'name': '异术超能', 'channel_id': 1, 'category_id': 54, 'category_name': '都市爱情'},
+ {'id': 15, 'name': '都市生活', 'channel_id': 1, 'category_id': 54, 'category_name': '都市爱情'},
+ {'id': 16, 'name': '黑白两道', 'channel_id': 1, 'category_id': 54, 'category_name': '都市爱情'},
+ {'id': 17, 'name': '官场商战', 'channel_id': 1, 'category_id': 54, 'category_name': '都市爱情'},
+ {'id': 18, 'name': '青春校园', 'channel_id': 1, 'category_id': 54, 'category_name': '都市爱情'},
+ {'id': 31, 'name': '架空历史', 'channel_id': 1, 'category_id': 51, 'category_name': '特种军旅'},
+ {'id': 32, 'name': '军旅战争', 'channel_id': 1, 'category_id': 51, 'category_name': '特种军旅'},
+ {'id': 102, 'name': '抗战烽火', 'channel_id': 1, 'category_id': 51, 'category_name': '特种 军旅'},
+ {'id': 5, 'name': '侦探推理', 'channel_id': 1, 'category_id': 22, 'category_name': '西方玄幻'},
+ {'id': 38, 'name': '灵异惊悚', 'channel_id': 1, 'category_id': 22, 'category_name': '西方玄幻'},
+ {'id': 103, 'name': '未来世界', 'channel_id': 1, 'category_id': 22, 'category_name': '西方玄幻'},
+ {'id': 104, 'name': '古武机甲', 'channel_id': 1, 'category_id': 22, 'category_name': '西方玄幻'},
+ {'id': 105, 'name': '星际时空', 'channel_id': 1, 'category_id': 22, 'category_name': '西方玄幻'},
+ {'id': 25, 'name': '游戏异界', 'channel_id': 1, 'category_id': 19, 'category_name': '游戏竞技'},
+ {'id': 34, 'name': '虚拟网游', 'channel_id': 1, 'category_id': 19, 'category_name': '游戏竞技'},
+ {'id': 35, 'name': '电子竞技', 'channel_id': 1, 'category_id': 19, 'category_name': '游戏竞技'},
+ {'id': 36, 'name': '体育竞技', 'channel_id': 1, 'category_id': 19, 'category_name': '游戏竞技'},
+ {'id': 1000, 'name': '名人传记', 'channel_id': 1, 'category_id': 127, 'category_name': '其他作品'},
+ {'id': 1002, 'name': '经典名著', 'channel_id': 1, 'category_id': 127, 'category_name': '其他作品'},
+ {'id': 1004, 'name': '传统文化', 'channel_id': 1, 'category_id': 127, 'category_name': '其他作品'},
+ {'id': 1006, 'name': '人际社交', 'channel_id': 1, 'category_id': 127, 'category_name': '其他作品'},
+ {'id': 2001, 'name': '科幻未来', 'channel_id': 1, 'category_id': 127, 'category_name': '其他作品'},
+ {'id': 2002, 'name': '衍生同人', 'channel_id': 1, 'category_id': 127, 'category_name': '其他作品'},
+ {'id': 2003, 'name': '古风穿越', 'channel_id': 1, 'category_id': 127, 'category_name': '其他作品'},
+ {'id': 2004, 'name': '魔幻 奇幻', 'channel_id': 1, 'category_id': 127, 'category_name': '其他作品'},
+ {'id': 2005, 'name': '游戏竞技', 'channel_id': 1, 'category_id': 127, 'category_name': '其他作品'},
+ {'id': 2006, 'name': '悬疑烧脑', 'channel_id': 1, 'category_id': 127, 'category_name': '其他作品'},
+ {'id': 2007, 'name': '都市幻想', 'channel_id': 1, 'category_id': 127, 'category_name': '其他作品'},
+ {'id': 2008, 'name': '神秘灵异', 'channel_id': 1, 'category_id': 127, 'category_name': '其他作品'},
+ {'id': 2009, 'name': '青春校园', 'channel_id': 1, 'category_id': 127, 'category_name': '其他作品'},
+ {'id': 2010, 'name': '武侠仙侠', 'channel_id': 1, 'category_id': 127, 'category_name': '其他作品'},
+ {'id': 2020, 'name': '历史军事', 'channel_id': 1, 'category_id': 127, 'category_name': '其他作品'},
+ {'id': 48, 'name': '总裁豪门', 'channel_id': 2, 'category_id': 98, 'category_name': '婚恋情感'},
+ {'id': 49, 'name': '职场白领', 'channel_id': 2, 'category_id': 98, 'category_name': '婚恋情感'},
+ {'id': 50, 'name': '浪漫言情', 'channel_id': 2, 'category_id': 98, 'category_name': '婚恋情感'},
+ {'id': 51, 'name': '婚姻家庭', 'channel_id': 2, 'category_id': 98, 'category_name': '婚恋情感'},
+ {'id': 53, 'name': '情感纪实', 'channel_id': 2, 'category_id': 98, 'category_name': '婚恋情感'},
+ {'id': 81, 'name': '军婚高干', 'channel_id': 2, 'category_id': 98, 'category_name': '婚恋情感'},
+ {'id': 55, 'name': '花季雨季', 'channel_id': 2, 'category_id': 104, 'category_name': '青春校园'},
+ {'id': 56, 'name': '成长励志', 'channel_id': 2, 'category_id': 104, 'category_name': '青春校园'},
+ {'id': 57, 'name': '青春伤痛', 'channel_id': 2, 'category_id': 104, 'category_name': '青春校园'},
+ {'id': 58, 'name': '校园生活', 'channel_id': 2, 'category_id': 104, 'category_name': '青春校园'},
+ {'id': 59, 'name': '女尊天下', 'channel_id': 2, 'category_id': 123, 'category_name': '女尊王朝'},
+ {'id': 60, 'name': '宫闱情仇', 'channel_id': 2, 'category_id': 120, 'category_name': '宫斗宅斗'},
+ {'id': 61, 'name': '异国浪漫', 'channel_id': 2, 'category_id': 83, 'category_name': '穿越重生'},
+ {'id': 62, 'name': '宅门世家', 'channel_id': 2, 'category_id': 120, 'category_name': '宫斗宅斗'},
+ {'id': 80, 'name': '穿越言情', 'channel_id': 2, 'category_id': 83, 'category_name': '穿越重生'},
+ {'id': 63, 'name': '仙侣情缘', 'channel_id': 2, 'category_id': 96, 'category_name': '东方玄幻'},
+ {'id': 64, 'name': '妖精幻情', 'channel_id': 2, 'category_id': 96, 'category_name': '东方玄幻'},
+ {'id': 65, 'name': '奇幻柔情', 'channel_id': 2, 'category_id': 96, 'category_name': '东方玄幻'},
+ {'id': 66, 'name': '魔法异能', 'channel_id': 2, 'category_id': 96, 'category_name': '东方玄幻'},
+ {'id': 67, 'name': ' 重生爱恋', 'channel_id': 2, 'category_id': 96, 'category_name': '东方玄幻'},
+ {'id': 68, 'name': '反穿时空', 'channel_id': 2, 'category_id': 83, 'category_name': '穿越重生'},
+ {'id': 69, 'name': '古代王朝', 'channel_id': 2, 'category_id': 83, 'category_name': '穿越重生'},
+ {'id': 70, 'name': '架空历史', 'channel_id': 2, 'category_id': 83, 'category_name': '穿越重生'},
+ {'id': 71, 'name': '前世今生', 'channel_id': 2, 'category_id': 83, 'category_name': '穿越重 生'},
+ {'id': 120, 'name': '文艺', 'channel_id': 2, 'category_id': 83, 'category_name': '穿越重生'},
+ {'id': 72, 'name': '科幻小说', 'channel_id': 2, 'category_id': 119, 'category_name': '游戏'},
+ {'id': 73, 'name': '网游小说', 'channel_id': 2, 'category_id': 119, 'category_name': '游戏'},
+ {'id': 74, 'name': '灵异恐怖', 'channel_id': 2, 'category_id': 119, 'category_name': '游戏'},
+ {'id': 75, 'name': '推理小说', 'channel_id': 2, 'category_id': 119, 'category_name': '游戏'},
+ {'id': 76, 'name': '纯爱浪漫', 'channel_id': 2, 'category_id': 106, 'category_name': '耽美同人'},
+ {'id': 77, 'name': '耽美', 'channel_id': 2, 'category_id': 106, 'category_name': '耽美同人'},
+ {'id': 78, 'name': '同人', 'channel_id': 2, 'category_id': 106, 'category_name': '耽美同人'},
+ {'id': 79, 'name': '百合', 'channel_id': 2, 'category_id': 106, 'category_name': '耽美同人'},
+ {'id': 1001, 'name': '名人传记', 'channel_id': 2, 'category_id': 107, 'category_name': '其他'},
+ {'id': 1003, 'name': '经典名著', 'channel_id': 2, 'category_id': 107, 'category_name': '其他'},
+ {'id': 1005, 'name': '传统文化', 'channel_id': 2, 'category_id': 107, 'category_name': '其他'},
+ {'id': 1007, 'name': '人际社交', 'channel_id': 2, 'category_id': 107, 'category_name': '其他'},
+ {'id': 2012, 'name': '衍 生言情', 'channel_id': 2, 'category_id': 103, 'category_name': '青春纯爱'},
+ {'id': 2013, 'name': '衍生纯爱', 'channel_id': 2, 'category_id': 103, 'category_name': '青春纯爱'},
+ {'id': 2014, 'name': '武侠仙侠', 'channel_id': 2, 'category_id': 96, 'category_name': '东方玄幻'},
+ {'id': 2015, 'name': '古风历史', 'channel_id': 2, 'category_id': 107, 'category_name': '其他'},
+ {'id': 2016, 'name': '青春恋爱', 'channel_id': 2, 'category_id': 103, 'category_name': ' 青春纯爱'},
+ {'id': 2017, 'name': '脑洞幻想', 'channel_id': 2, 'category_id': 107, 'category_name': '其他'},
+ {'id': 2018, 'name': '游戏悬疑', 'channel_id': 2, 'category_id': 119, 'category_name': '游戏'}]
+
+
+
+
+
 class BookSpider(baseSpider):
     name = 'kanshu'
     allowed_domains = ['hezuo.lunjian.com']
@@ -39,11 +134,11 @@ class BookSpider(baseSpider):
         if result is None or result.get('data') is None:
             return None
         result = result['data']
-
+        category_info = get_category_by_name(result['category'])
         return {
             'bid': result['id'], 'name': result['bookTitle'], 'author': result['author'],
             'intro': result['introduction'], 'cover': result['cover'], 'keyword': result['labels'],
-            'status': result['state'], 'category': result['category'],'category_id':1,
+            'status': result['state'], 'category': result['category'],'category_id':category_info['id'],
             'channel': result['channelId']
         }
 

+ 83 - 19
content_spider/temp_test.py

@@ -13,29 +13,93 @@ from xml.dom.minidom import parseString
 import time
 import xlrd
 
-#df = xlrd.open_workbook("./11111.xls")
-#table=df.sheets()[0]
+df = xlrd.open_workbook("./1.xls")
+table=df.sheets()[0]
 
 
 result = []
 
-#for i in range(1,table.nrows):
-#    row = table.row_values(i)
-#    if len(row) <= 0:
-#        break
-#    name = row[1]
-#    cid = int(row[0])
-#    item = {"id":cid,"name":name}
-
-
+for i in range(0,table.nrows):
+    row = table.row_values(i)
+    if len(row) <= 0:
+        break
+    c_id = int(row[2])
 
+    name = row[3]
+    item = {"id":c_id,"name":name}
+    if c_id in [19,20,21,22,23,24,101]:
+        item['channel_id'] = 1
+        item['category_id'] = 23
+        item['category_name'] = '玄幻奇幻'
+    if c_id in [26,27,29,30]:
+        item['channel_id'] = 1
+        item['category_id'] = 21
+        item['category_name'] = '武侠仙侠'
+    if c_id in [14,15,16,17,18]:
+        item['channel_id'] = 1
+        item['category_id'] = 54
+        item['category_name'] = '都市爱情'
+    if c_id in [31,32,102]:
+        item['channel_id'] = 1
+        item['category_id'] = 51
+        item['category_name'] = '特种军旅'
+    if c_id in [5,38,103,104,105]:
+        item['channel_id'] = 1
+        item['category_id'] = 22
+        item['category_name'] = '西方玄幻'
+    if c_id in [25,34,35,36]:
+        item['channel_id'] = 1
+        item['category_id'] = 19
+        item['category_name'] = '游戏竞技'
+    if c_id in [1000,1002,1004,1006,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2020]:
+        item['channel_id'] = 1
+        item['category_id'] = 127
+        item['category_name'] = '其他作品'
+    if c_id in [48,49,50,51,53,81]:
+        item['channel_id'] = 2
+        item['category_id'] = 98
+        item['category_name'] = '婚恋情感'
+    if c_id in [55,56,57,58]:
+        item['channel_id'] = 2
+        item['category_id'] = 104
+        item['category_name'] = '青春校园'
+    if c_id in [55,56,57,58]:
+        item['channel_id'] = 2
+        item['category_id'] = 104
+        item['category_name'] = '青春校园'
+    if c_id == 59:
+        item['channel_id'] = 2
+        item['category_id'] = 123
+        item['category_name'] = '女尊王朝'
+    if c_id == 60 or c_id == 62:
+        item['channel_id'] = 2
+        item['category_id'] = 120
+        item['category_name'] = '宫斗宅斗'
+    if c_id == 61 or c_id == 80 or c_id in [68,69,70,71,120]:
+        item['channel_id'] = 2
+        item['category_id'] = 83
+        item['category_name'] = '穿越重生'
+    if c_id in [63,64,65,66,67,2014]:
+        item['channel_id'] = 2
+        item['category_id'] = 96
+        item['category_name'] = '东方玄幻'
+    if c_id in [72,73,74,75,2018]:
+        item['channel_id'] = 2
+        item['category_id'] = 119
+        item['category_name'] = '游戏'
+    if c_id in [76,77,78,79]:
+        item['channel_id'] = 2
+        item['category_id'] = 106
+        item['category_name'] = '耽美同人'
+    if c_id == 2012 or c_id == 2013 or c_id == 2016:
+        item['channel_id'] = 2
+        item['category_id'] = 103
+        item['category_name'] = '青春纯爱'
+    if c_id in [1001,1003,1005,1007,2015,2017]:
+        item['channel_id'] = 2
+        item['category_id'] = 107
+        item['category_name'] = '其他'
+    result.append(item)
 
-category = [
-    ""
-];
 
-for item in category:
-    if item['sid'] == "1":
-        item['channel_id'] = 1
-    if item['sid'] == "2":
-        item['channel_id'] = 2
+print(result)