机器学习实战2--时光网评论分析
# coding: utf-8 # In[74]: import sys reload(sys) sys.setdefaultencoding('utf8') import graphlab import datetime # In[75]: graphlab.set_runtime_config('GRAPHLAB_DEFAULT_NUM_PYLAMBDA_WORKERS', 4) # In[76]: products = graphlab.SFrame.read_csv('data/commit.csv') # In[77]: products.head() # In[78]: #文本分析工具,对评论进行统计分析 products['word_count'] = graphlab.text_analytics.count_words(products['commits']) # In[79]: products.head() # In[80]: graphlab.canvas.set_target('browser') # In[81]: products['movie_name_zh'].show() # In[82]: #获取战狼2的点评 giraffe_reviews = products[products['movie_name_zh'] == '战狼2'] # In[83]: len(giraffe_reviews) #点评数量 # In[84]: giraffe_reviews['userscore'].show(view='Categorical') #查看分类性质的评分分布 #发现战狼2打7分的人最多 # In[88]: #忽略5分的评论,忽略没分的评论,分数转为浮点格式 products = products[products['userscore'] != 'None'] products['userscore'] products['userscore'] = products['userscore'].astype(float) products = products[products['userscore'] != 5] # In[89]: #筛选正面评价 products['sentiment'] = products['userscore'] >=6.0 # In[90]: products.head() # In[91]: #区分训练集和测试集,并限定seed train_data,test_data = products.random_split(.8, seed=0) # In[92]: #创建逻辑回归分类器 sentiment_model = graphlab.logistic_classifier.create(train_data, target='sentiment', features=['word_count'], validation_set=test_data) # In[93]: #评估情感分析模型.ROC(受试者工作特征曲线) sentiment_model.evaluate(test_data, metric='roc_curve') # In[94]: #可视化评估结果 sentiment_model.show(view='Evaluation') # In[95]: #添加新的一列,predicted_sentiment(情感预测),结果概率预测 giraffe_reviews['predicted_sentiment'] = sentiment_model.predict(giraffe_reviews, output_type='probability') # In[96]: giraffe_reviews.head() # In[97]: #排序,ascending决定是否升序 giraffe_reviews = giraffe_reviews.sort('predicted_sentiment', ascending=False) # In[98]: giraffe_reviews.head() # In[110]: giraffe_reviews[0] # In[112]: giraffe_reviews[2] # In[113]: giraffe_reviews[-1] # In[114]: giraffe_reviews[-2] # In[117]: giraffe_reviews = giraffe_reviews[giraffe_reviews['userscore'] == 'None'] # In[119]: giraffe_reviews.tail() # In[ ]:
代码地址(附作业答案): https://github.com/RedheatWei/aiproject/tree/master/Machine%20Learning%20Specialization/week3
本博客内容都是博主试验通过的方案与方法.
如需交流,请发邮件至qjyyn#qq.com