[豆瓣]電影評分系統(三)

[豆瓣]電影評分系統(三)

來自專欄 Antenna的python學習筆記1 人贊了文章

原作者:Charles的皮卡丘(微信公眾號)

一、項目目的

(二)已經實現了「讓機器學會分析不同電影的評論,然後根據評論,對所評論的電影進行打分(5分制)」。具體實現主要分為兩部分:用Python寫爬蟲代碼爬去豆瓣的電影評論以及相應評分作為訓練數據,利用神經網路對獲得的數據進行學習以獲得相應的模型。

本次的目的主要是:

模型優化+界面可視化

二、總體思路

將原先的以為詞向量轉化為二維,利用含三層卷積的建議分類網路(因為此評分問題的本質還是分類問題)對轉化後的數據進行訓練從而獲得相應的豆瓣影評自動評分模型。

三、源碼

import numpy as np import jiebaimport picklefrom keras.models import load_modelfrom tkinter import *from PIL import Image, ImageTkfrom keras import backend as Kimport osimport threading------------------------------------------------------------------------ PART1:評論得分判定------------------------------------------------------------------------class Film_Commemt_Score(): def __init__(self): pass # 剔除符號 def filterword(self, filterdata): self.symbol = ,。「」~!@#¥%……&*()——+=【】{}、|;:『』《》?!#$^&()[]{};:",.<>/?\-
for self.sym in self.symbol: self.filterdata = filterdata.replace(self.sym, ) self.filterdata = self.filterdata.strip( ) return self.filterdata # 判斷影評得分 def Get_Score(self, comment_text): # 導入模型 self.db_model = load_model(douban_predict_score.h5) # 導入辭彙表 with open(voca.p, rb) as self.f: self.voca = pickle.load(self.f) # 需評分的評論 self.comment_text = str(comment_text) # 數據清洗及轉化 self.comment_filtered = self.filterword(self.comment_text) self.comment_cutted = jieba.lcut(self.comment_filtered) self.comment_vector = [0] * len(self.voca) for self.word in self.comment_cutted: if self.word in self.voca: self.comment_vector[self.voca.index(self.word)] += 1 # 用轉化好的數據進行預測 self.comment_vector = np.array([self.comment_vector]) self.x = [] self.x.append(np.array(self.comment_vector[0 ,:]).reshape(8, 1943)) self.comment_vector = np.array(self.x) self.comment_vector = np.expand_dims(self.comment_vector, axis=3) self.comment_score = self.db_model.predict(self.comment_vector) return self.comment_score------------------------------------------------------------------------ PART2:Demo界面------------------------------------------------------------------------# 界面初始化root = Tk()root.title(豆瓣電影評分系統)root.resizable(False, False)root.geometry(500x400+400+120)# 設置背景圖片image_path = rbg1_demo.pngbg = Image.open(image_path)bgimg = ImageTk.PhotoImage(bg)lb_bgimg = Label(root, image=bgimg)lb_bgimg.grid()# 布局def score_records(tips): txt_show_score.mark_set("here", 1.0) txt_show_score.tag_config("tag1", background="black", foreground="white") txt_show_score.insert("here", tips, "tag1") txt_show_score.insert("here",
)class Get_Score_Thread(threading.Thread): def __init__(self, *args, **kwargs): super(Get_Score_Thread, self).__init__(*args, **kwargs) self.__running = threading.Event() self.__running.set() def run(self): while self.__running.isSet(): self.score = Film_Commemt_Score().Get_Score(txt_comment.get("0.0", "end")) self.score = self.score[0].tolist().index(max(self.score[0])) self.score = self.score+1 score_records("此評論相當於評分為:%d" % self.score) K.clear_session() # 去除下面兩行注釋可實現語音播報評分得分功能 # content = "此評論相當於評分為%d分" % self.score # _ = os.system(mshta vbscript:createobject("sapi.spvoice").speak("%s")(window.close) % content) self.__running.clear()def Get_Comment_Score(): t_get_store = Get_Score_Thread() t_get_store.start()lb_comment = Label(root, text=請輸入您對電影的評價:, font=(楷體, 12))lb_comment.place(relx=0.20, rely=0.10, anchor=CENTER)txt_comment = Text(root, bd=3, width=25, height=5, font=(楷體, 10))txt_comment.place(relx=0.21, rely=0.22, anchor=CENTER)button_getscore = Button(root, text=獲得評分, bd=5, width=12, height=2, command=Get_Comment_Score, font=(楷體, 12), bg=brown)button_getscore.place(relx=0.22, rely=0.44, anchor=CENTER)lb_getscore = Label(root, text=評分結果:, font=(楷體, 12))lb_getscore.place(relx=0.11, rely=0.58, anchor=CENTER)txt_show_score = Text(root, bd=3, width=25, height=5, font=(楷體, 10))txt_show_score.bind("<KeyPress>", lambda e : "break")txt_show_score.place(relx=0.21, rely=0.70, anchor=CENTER)root.mainloop()


嚴格來說,訓練數據量少,獲得的模型就較差,只有大量的訓練數據才能保證實用性。本例僅供學習和娛樂。

我對深度學習還不太了解,待以後再深入學習。


一個小彩蛋:

# 爬取百度圖片搜索結果腳本import osimport sysimport itertoolsimport urllibfrom urllib.parse import quote import requestsimport re# 用於解碼objURLstr_table = { _z2C$q: :, _z&e3B: ., AzdH3F: /}char_table = { w: a, k: b, v: c, 1: d, j: e, u: f, 2: g, i: h, t: i, 3: j, h: k, s: l, 4: m, g: n, 5: o, r: p, q: q, 6: r, f: s, p: t, 7: u, e: v, o: w, 8: 1, d: 2, n: 3, 9: 4, c: 5, m: 6, 0: 7, b: 8, l: 9, a: 0}# 轉為ASCII碼char_table = {ord(key): ord(value) for key, value in char_table.items()}# 獲得所有圖片下載鏈接def Build_Urls(keyword): keyword = quote(keyword) url = r"http://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&ct=201326592&fp=result&queryWord={word}&cl=2&lm=-1&ie=utf-8&oe=utf-8&st=-1&ic=0&word={word}&face=0&istype=2nc=1&pn={pn}&rn=60" urls = (url.format(word=keyword, pn=i) for i in itertools.count(start=0, step=60)) return urls# 解碼圖片URLdef decode_objURL(url): # 替換字元 for key, value in str_table.items(): url = url.replace(key, value) # 替換字元 return url.translate(char_table)# 解析JSON獲取圖片URLdef Get_ImgUrl(html): re_url = re.compile(r"objURL":"(.*?)") imgUrls = [decode_objURL(x) for x in re_url.findall(html)] return imgUrls# 下載圖片到指定路徑def downImage(imgUrl, dirpath, imgName): #print(imgName) #print(dirpath) filename = os.path.join(dirpath, imgName) try: res = requests.get(imgUrl, timeout=15) except: print("[異常:]", imgUrl) return False with open(filename, wb) as f: f.write(res.content) return Trueif __name__ == __main__: print("*" * 55) print(腳本功能:<自動下載百度圖片搜索結果>) print(下載結果保存在腳本所在目錄下的pictures文件夾中) print(目前只支持單個關鍵詞搜索,輸入後按回車即可自動下載) print("*" * 55) keyword = input("請輸入你要下載的圖片關鍵詞:
") try: IMG_NUM = int(input(請輸入你要下載的圖片數量:
)) except: IMG_NUM = 0 # 創建pictures文件夾(若不存在)用於保存結果 try: os.mkdir("pictures") dirpath = ./pictures except: dirpath = ./pictures # 獲得所有圖片下載鏈接 urls = Build_Urls(keyword) index = 0 for url in urls: if index > IMG_NUM-1: break print("[GET_URL]:", url) html = requests.get(url, timeout=10).content.decode(utf-8, replace) # 獲取圖片URL imgUrls = Get_ImgUrl(html) # 沒有圖片則結束 if len(imgUrls) == 0: break for imgUrl in imgUrls: pic_name = "%s%s.jpg" % (str(keyword), str(index)) if downImage(imgUrl, dirpath, pic_name): index += 1 print("已下載%s張" % index) if index > IMG_NUM-1: break


推薦閱讀:

蘋果iPhoneX第二代或將增加512GB版本和支持手寫筆
蘋果建議Facebook下架其VPN應用程序,以保護用戶隱私
國美互聯網怎麼樣,HR去國美互聯網發展如何?
建築城市 智創未來-尋找 「未來領袖」·機器人產業博士
凈水器的水能直接喝嗎?凈水器的使用方法是什麼?

TAG:電影評分 | 豆瓣 | 科技 |