19级课堂测试基于中文新闻分词绘制词云图
19级课堂测试基于中文新闻分词绘制词云图
要求
1、数据导入展示:
将所有类别的新闻导入数据库中。以树形目录形式展示新闻类别,每个树形节点代表新闻分类,括号中代表该类新闻的数量,选择每个新闻,以列表形式显示新闻标题,点击新闻标题,可以查看详细信息。
2、文章统计:
统计各个类别的文章总数,以柱状图表示。
3、文章分词:
使用中文分词算法解析所有新闻正文,并统计每个词语出现的数量,并以词云图的方式展示,点击词云图中该词语,显示其数量。
4、选择某一类文章,绘制该类型新闻文章词云图,例如汽车类新闻词云图
我实现的
- 数据导入展示
- 文章统计
- 选择某一类文章,绘制该类型新闻文章词云图,例如汽车类新闻词云图
没有做总的词云图,懒得做了
代码实现
数据源文件样式
数据处理代码
源文件导入数据库
import pandas as pd
import sqlalchemy as sqla
import jieba.analyse
# 读文件
data = pd.read_excel(r'D:\腾讯软件\815049548\FileRecv\1617241934831197.xlsx', sheet_name=None)
# 数据库连接
db = sqla.create_engine('mysql+pymysql://root:123456@localhost/database?charset=utf8')
# 取出各个sheet导入到数据库
for key in data.keys():
sheet = data[key]
# if_exists='append' 追加内容
sheet.to_sql('sheet', db, if_exists='append')
结巴分词并导入数据库
import pandas as pd
import sqlalchemy as sqla
import jieba.analyse
# 读
data = pd.read_excel(r'D:\腾讯软件\815049548\FileRecv\1617241934831197.xlsx', sheet_name=None)
db = sqla.create_engine('mysql+pymysql://root:123456@localhost/database?charset=utf8')
for key in data.keys():
sheet = data[key]
list = sheet['content']
insert = pd.DataFrame()
insert = insert.assign(col_3=pd.Series([]).values)
insert = insert.assign(col_2=pd.Series([]).values)
for index in list:
# jieba.analyse 提取句子级的关键字
li = jieba.analyse.extract_tags(str(index))
df = pd.DataFrame()
arr = []
for i in range(len(li)):
arr.append(key)
df = df.assign(col_3=pd.Series(arr).values)
df = df.assign(col_2=pd.Series(li).values)
insert = insert.append(df)
insert.to_sql('wordTwo', db, if_exists='append')
统计分词结果
sql凭印象写的可能有一点出入(狗头)
create table allresult as select count(`key`) as `value`,`key`,channel from word group by `key` order by c desc
编写后端代码
因为没有涉及什么复杂操作,只是基本增删改查,所以在这里只贴部分源码
pojo
package com.murphy.pojo;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@AllArgsConstructor
@NoArgsConstructor
public class News {
private String index;
private String title;
private String content;
private String channel;
}
package com.murphy.pojo;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@AllArgsConstructor
@NoArgsConstructor
public class Word {
private String key;
private int value;
private String channel;
}
mapper
package com.murphy.mapper;
import com.murphy.pojo.Word;
import org.apache.ibatis.annotations.Mapper;
import org.apache.ibatis.annotations.Param;
import org.apache.ibatis.annotations.Select;
import java.util.List;
@Mapper
public interface WordMapper {
@Select("SELECT * FROM allresult WHERE channel = #{channel} LIMIT #{limit}")
List<Word> getWords(String channel, int limit);
}
package com.murphy.mapper;
import com.murphy.pojo.News;
import org.apache.ibatis.annotations.Mapper;
import org.apache.ibatis.annotations.Select;
import java.util.List;
@Mapper
public interface NewsMapper {
@Select("SELECT * FROM sheet WHERE channel = #{channel}")
List<News> getChannel(String channel);
@Select("SELECT * FROM sheet WHERE title = #{title}")
List<News> getNews(String title);
}
controller
package com.murphy.controller;
import com.alibaba.fastjson.JSON;
import com.murphy.mapper.NewsMapper;
import com.murphy.mapper.WordMapper;
import com.murphy.pojo.News;
import com.murphy.pojo.Word;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.CrossOrigin;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.ResponseBody;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@Controller
@CrossOrigin
@RequestMapping("/show")
public class WebController {
@Autowired
NewsMapper newsMapper;
@Autowired
WordMapper wordMap;
//获取对应频道内容
@RequestMapping("/channel")
@ResponseBody
public String getChannel(String channel){
channel = channel.substring(0,2);
List<News> newsList = newsMapper.getChannel(channel);
List<Map<String,String>> mapList = new ArrayList<>();
for (News news : newsList) {
Map<String,String> newsMap = new HashMap<>();
newsMap.put("label",news.getTitle());
mapList.add(newsMap);
}
return JSON.toJSONString(mapList);
}
//获取指定文章内容
@RequestMapping("/title")
@ResponseBody
public String getNews(String title){
return newsMapper.getNews(title).get(0).getContent();
}
/*
*获取词和对应值
* String channel 频道
* String limit 返回词数
*/
@RequestMapping("/words")
@ResponseBody
public String getWords(String channel,String limit){
channel = channel.substring(0,2);
List<Word> words = wordMap.getWords(channel,Integer.parseInt(limit));
List<String> keys = new ArrayList<>();
List<String> values = new ArrayList<>();
for (Word word : words) {
keys.add(word.getKey());
values.add(String.valueOf(word.getValue()));
}
Map<String,List<String>> map = new HashMap<>();
map.put("key",keys);
map.put("value",values);
return JSON.toJSONString(map);
}
}
页面实现
树形结构使用了element ui
词云图使用的是应该是一篇叫 产品经理:你能让词云动起来吗 的博客具体连接忘了
<template>
<div>
<el-row :gutter="20">
<el-col :span="8">
<div class="grid-content bg-purple">
<div style="box-shadow: 0 2px 4px rgba(0, 0, 0, .12), 0 0 6px rgba(0, 0, 0, .04)">
<el-tree :data="data" :props="defaultProps" @node-click="handleNodeClick" />
</div>
</div>
</el-col>
<el-col :span="16">
<div class="block">
<el-slider v-model="value1" :max="1000" :min="30" />
</div>
<div id="EchartsMain" style="width: 100%;height: 400px" />
<div id="EchartsM" ref="wordCloud" class="wordCloud" />
</el-col>
</el-row>
<el-dialog
title="详情"
:visible.sync="dialogVisible"
width="30%"
:before-close="handleClose"
>
<span>{{ msg }}</span>
<span slot="footer" class="dialog-footer">
<el-button @click="dialogVisible = false">取 消</el-button>
<el-button type="primary" @click="dialogVisible = false">确 定</el-button>
</span>
</el-dialog>
</div>
</template>
<script>
import * as echarts from 'echarts'
import '/echarts-wordcloud'
var da
var index = 0
var option = {
xAxis: {
type: 'category',
data: []
},
yAxis: {
type: 'value'
},
series: [
{
type: 'bar',
data: []
}
]
}
export default {
data() {
return {
ti: '',
value1: 30,
msg: '',
worddata: [
],
dialogVisible: false,
data: [
{
label: '财经(8597)',
children: [{
label: '二级 1-1'
}]
}, {
label: '房产(200)',
children: [{
label: '二级 2-1'
}, {
label: '二级 2-2'
}]
}, {
label: '教育(500)',
children: [{
label: '二级 3-2'
}]
}, {
label: '科技(830)',
children: [{
label: '二级 3-2'
}]
}, {
label: '军事(158)',
children: [{
label: '二级 3-2'
}]
}, {
label: '汽车(647)',
children: [{
label: '二级 3-2'
}]
}, {
label: '体育(1200)',
children: [{
label: '二级 3-2'
}]
}, {
label: '游戏(1300)',
children: [{
label: '二级 3-2'
}]
}, {
label: '娱乐(1200)',
children: [{
label: '二级 3-2'
}]
}, {
label: '其他(0)',
children: [
{
label: '二级 3-2'
}
]
}
],
defaultProps: {
children: 'children',
label: 'label'
},
hotWord: [],
color: [
'#a18cd1', '#fad0c4', '#ff8177',
'#fecfef', '#fda085', '#f5576c',
'#fe9a8b', '#30cfd0', '#38f9d7'
],
wordArr: [],
timer: null,
resetTime: 10,
ContainerSize: ''
}
},
watch: {
value1(oldValue, newValue) {
console.log(newValue)
this.$axios({
method: 'post',
async: false,
url: 'http://localhost:8080/show/words?channel=' + this.data[index].label + '&limit=' + newValue
}).then((response) => {
console.log(response.data)
document.getElementById('EchartsM').innerHTML = ''
this.hotWord = response.data['key']
this.init()
var mCharts = echarts.init(document.getElementById('EchartsMain'))
option.xAxis.data = response.data['key']
option.series[0].data = response.data['value']
mCharts.setOption(option)
}).catch((error) => {
console.log(error) // 请求失败返回的数据
})
}
},
mounted() {
this.init()
},
methods: {
formatTooltip(val) {
return val / 100
},
handleClose(done) {
this.$confirm('确认关闭?')
.then(_ => {
done()
})
.catch(_ => {})
},
handleNodeClick(data) {
for (var i = 0; i < 10; i++) {
if (data.label == this.data[i].label) {
index = i
}
}
this.data[index].children = []
if (data.children != null) {
this.$axios({
method: 'post',
async: false,
url: 'http://localhost:8080/show/channel?channel=' + data.label
}).then((response) => {
da = response.data
this.data[index].children = da
}).catch((error) => {
console.log(error) // 请求失败返回的数据
})
this.$axios({
method: 'post',
async: false,
url: 'http://localhost:8080/show/words?channel=' + data.label + '&limit=30'
}).then((response) => {
console.log(response.data)
document.getElementById('EchartsM').innerHTML = ''
this.hotWord = response.data['key']
var mCharts = echarts.init(document.getElementById('EchartsMain'))
option.xAxis.data = response.data['key']
option.series[0].data = response.data['value']
mCharts.setOption(option)
this.init()
}).catch((error) => {
console.log(error) // 请求失败返回的数据
})
} else {
this.$axios({
method: 'post',
async: false,
url: 'http://localhost:8080/show/title?title=' + data.label
}).then((response) => {
da = response.data
this.msg = da
}).catch((error) => {
console.log(error) // 请求失败返回的数据
})
console.log(data.label)
this.ti = data.label
this.dialogVisible = true
}
},
init() {
this.dealSpan()
this.initWordPos()
this.render()
},
dealSpan() {
const wordArr = []
this.hotWord.forEach((value) => {
// 根据词云数量生成span数量设置字体颜色和大小
const spanDom = document.createElement('span')
spanDom.style.position = 'relative'
spanDom.style.display = 'inline-block'
spanDom.style.color = this.randomColor()
spanDom.style.fontSize = this.randomNumber(15, 30) + 'px'
spanDom.innerHTML = value
spanDom.local = {
position: {
// 位置
x: 0,
y: 0
},
direction: {
// 方向 正数往右 负数往左
x: 1,
y: 1
},
velocity: {
// 每次位移初速度
x: -0.5 + Math.random(),
y: -0.5 + Math.random()
}
}
this.$refs.wordCloud.appendChild(spanDom)
wordArr.push(spanDom)
})
this.wordArr = wordArr
},
randomColor() {
// 获取随机颜色
var colorIndex = Math.floor(this.color.length * Math.random())
return this.color[colorIndex]
},
randomNumber(lowerInteger, upperInteger) {
// 获得一个包含最小值和最大值之间的随机数。
const choices = upperInteger - lowerInteger + 1
return Math.floor(Math.random() * choices + lowerInteger)
},
render() {
if (this.resetTime < 100) {
this.resetTime = this.resetTime + 1
this.timer = requestAnimationFrame(this.render.bind(this))
this.resetTime = 0
}
this.wordFly()
},
wordFly() {
this.wordArr.forEach((value) => {
// 设置运动方向 大于边界或者小于边界的时候换方向
if (value.local.realPos.minx + value.local.position.x < this.ContainerSize.leftPos.x || value.local.realPos.maxx + value.local.position.x > this.ContainerSize.rightPos.x) value.local.direction.x = -value.local.direction.x
if (value.local.realPos.miny + value.local.position.y < this.ContainerSize.leftPos.y || value.local.realPos.maxy + value.local.position.y > this.ContainerSize.rightPos.y) value.local.direction.y = -value.local.direction.y
value.local.position.x += value.local.velocity.x * value.local.direction.x
value.local.position.y += value.local.velocity.y * value.local.direction.y
// 给每个词云加动画过渡
value.style.transform = 'translateX(' + value.local.position.x + 'px) translateY(' + value.local.position.y + 'px)'
})
},
initWordPos() {
// 计算每个词的真实位置和容器的位置
this.wordArr.forEach((value) => {
value.local.realPos = {
minx: value.offsetLeft,
maxx: value.offsetLeft + value.offsetWidth,
miny: value.offsetTop,
maxy: value.offsetTop + value.offsetHeight
}
})
this.ContainerSize = this.getContainerSize()
},
getContainerSize() {
// 判断容器大小控制词云位置
const el = this.$refs.wordCloud
return {
leftPos: {
// 容器左侧的位置和顶部位置
x: el.offsetLeft,
y: el.offsetTop
},
rightPos: {
// 容器右侧的位置和底部位置
x: el.offsetLeft + el.offsetWidth,
y: el.offsetTop + el.offsetHeight
}
}
},
destroyed() {
// 组件销毁,关闭定时执行
cancelAnimationFrame(this.timer)
}
}
}
</script>
<style scoped>
.wordCloud{
width:100%;
height:100vh;
}
</style>