[R] Draw a wordcloud
# 加载rJava、Rwordseg库 library(rJava); library(Rwordseg); library(RColorBrewer); # == 读入数据 lecture=read.csv("G:\\test.txt",sep=",",header=TRUE,fileEncoding="UTF-8"); # 查看前几行,看是否有字符编码问题 head(lecture); # 获取数据集长度 n=length(lecture[,1]); print(n) # == 文本预处理 res=lecture[lecture!=" "]; #剔除URL res=gsub(pattern="http:[a-zA-Z\\/\\.0-9]+","",res); #剔除特殊词 res=gsub(pattern="[我|你|的|了|是|和|阳|创业|宁阳]","",res); # == 分词+频数统计 words=unlist(lapply(X=res, FUN=segmentCN)); word=lapply(X=words, FUN=strsplit, " "); v=table(unlist(word)); # 降序排序 v=rev(sort(v)); d=data.frame(word=names(v), freq=v); # 过滤掉1个字和词频小于100的记录 d=subset(d, nchar(as.character(d$word))>1 & d$freq>=10) require(wordcloud) library(RColorBrewer); dd = head(d, 50) op = par(bg = "lightyellow") #背景为亮黄色 rainbowLevels = rainbow((dd$freq)/(max(dd$freq) - 10)) #不知道什么意义,删除后图形无太大变化 text(family = "Kai") wordcloud(d$word, d$freq, scale=c(5,0.5), random.order=FALSE, colors=brewer.pal(8, "Dark2"),use.r.layout=FALSE) # par(op)