隐藏页面特效

新闻分类-中文分词+词云展示(1)

1、导入数据创建存储关键词表(此处使用MySQL)

 

2、使用jieba进行分词统计并存储到表中

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
<code-pre class="code-pre" id="pre-eYyMGm"><code-line class="line-numbers-rows"></code-line># -*- coding: utf-8 -*-  
<code-line class="line-numbers-rows"></code-line>import pandas as pd
<code-line class="line-numbers-rows"></code-line>import pymysql
<code-line class="line-numbers-rows"></code-line>import jieba<br>#此处只是其中一个表的分词
<code-line class="line-numbers-rows"></code-line>def getdata():
<code-line class="line-numbers-rows"></code-line>    dbconn=pymysql.connect(host="127.0.0.1", database="test1125", user="root", password="", port=3306, charset='utf8')
<code-line class="line-numbers-rows"></code-line>    #sql语句
<code-line class="line-numbers-rows"></code-line>    sqlcmd="select content from sheet_car limit 80"
<code-line class="line-numbers-rows"></code-line>    #利用pandas 模块导入mysql数据
<code-line class="line-numbers-rows"></code-line>    titles=pd.read_sql(sqlcmd,dbconn)
<code-line class="line-numbers-rows"></code-line>    keywords =""
<code-line class="line-numbers-rows"></code-line>    print(titles.values)
<code-line class="line-numbers-rows"></code-line>    for i in range(len(titles)):
<code-line class="line-numbers-rows"></code-line>        str =(",").join(titles.values[i])
<code-line class="line-numbers-rows"></code-line>        word_list = jieba.cut(str)
<code-line class="line-numbers-rows"></code-line>        keywords = list(word_list)
<code-line class="line-numbers-rows"></code-line>        count = 0
<code-line class="line-numbers-rows"></code-line>        for count in range(len(keywords)):
<code-line class="line-numbers-rows"></code-line>            if checkword(keywords[count]):
<code-line class="line-numbers-rows"></code-line>                flag = checkre(pymysql.connect(host="127.0.0.1", database="test1125", user="root", password="", port=3306, charset='utf8'), keywords[count])
<code-line class="line-numbers-rows"></code-line>                if flag:
<code-line class="line-numbers-rows"></code-line>                    save_keywords(pymysql.connect(host="127.0.0.1", database="test1125", user="root", password="", port=3306, charset='utf8'), keywords[count])
<code-line class="line-numbers-rows"></code-line>                    print(keywords[count])
<code-line class="line-numbers-rows"></code-line>                else:
<code-line class="line-numbers-rows"></code-line>                    updatenum(pymysql.connect(host="127.0.0.1", database="test1125", user="root", password="", port=3306, charset='utf8'), keywords[count])
<code-line class="line-numbers-rows"></code-line>            else:
<code-line class="line-numbers-rows"></code-line>                print("未知词语")
<code-line class="line-numbers-rows"></code-line>def checkword(word):
<code-line class="line-numbers-rows"></code-line>    invalid_words = [',', '.', ',', '。', ':', '“', '”', '"', '?', '?', '《', '》', '(', '{', ')', '}', '!', '%', '℃', '¥', '#']
<code-line class="line-numbers-rows"></code-line>    if word.lower() in invalid_words:
<code-line class="line-numbers-rows"></code-line>        return False
<code-line class="line-numbers-rows"></code-line>    else:
<code-line class="line-numbers-rows"></code-line>        return True
<code-line class="line-numbers-rows"></code-line>
<code-line class="line-numbers-rows"></code-line>
<code-line class="line-numbers-rows"></code-line>def save_keywords(db, keyword):
<code-line class="line-numbers-rows"></code-line>    # 使用cursor()方法获取操作游标
<code-line class="line-numbers-rows"></code-line>    cursor = db.cursor()
<code-line class="line-numbers-rows"></code-line>    # SQL 插入语句
<code-line class="line-numbers-rows"></code-line>    sql = "INSERT INTO key_sheet_car(keywords,num) VALUES ('%s',1)" % (keyword)
<code-line class="line-numbers-rows"></code-line>    try:
<code-line class="line-numbers-rows"></code-line>        # 执行sql语句
<code-line class="line-numbers-rows"></code-line>        cursor.execute(sql)
<code-line class="line-numbers-rows"></code-line>        # 执行sql语句
<code-line class="line-numbers-rows"></code-line>        print("true")
<code-line class="line-numbers-rows"></code-line>        db.commit()
<code-line class="line-numbers-rows"></code-line>    except:
<code-line class="line-numbers-rows"></code-line>        print("数据插入失败")
<code-line class="line-numbers-rows"></code-line>        # 发生错误时回滚
<code-line class="line-numbers-rows"></code-line>        db.rollback()
<code-line class="line-numbers-rows"></code-line>
<code-line class="line-numbers-rows"></code-line>    # 关闭数据库连接
<code-line class="line-numbers-rows"></code-line>    db.close()
<code-line class="line-numbers-rows"></code-line>
<code-line class="line-numbers-rows"></code-line>def updatenum(db,keyword):
<code-line class="line-numbers-rows"></code-line>    # 使用cursor()方法获取操作游标
<code-line class="line-numbers-rows"></code-line>    cursor = db.cursor()
<code-line class="line-numbers-rows"></code-line>    # SQL 插入语句
<code-line class="line-numbers-rows"></code-line>    sql = "update key_sheet_car set num=num+1 where keywords = '%s' " % keyword
<code-line class="line-numbers-rows"></code-line>    try:
<code-line class="line-numbers-rows"></code-line>        # 执行sql语句
<code-line class="line-numbers-rows"></code-line>        cursor.execute(sql)
<code-line class="line-numbers-rows"></code-line>        # 执行sql语句
<code-line class="line-numbers-rows"></code-line>        db.commit()
<code-line class="line-numbers-rows"></code-line>    except:
<code-line class="line-numbers-rows"></code-line>        print("数据更新失败")
<code-line class="line-numbers-rows"></code-line>        # 发生错误时回滚
<code-line class="line-numbers-rows"></code-line>        db.rollback()
<code-line class="line-numbers-rows"></code-line>    # 关闭数据库连接
<code-line class="line-numbers-rows"></code-line>    db.close()
<code-line class="line-numbers-rows"></code-line>
<code-line class="line-numbers-rows"></code-line>def checkre(db, keyword):
<code-line class="line-numbers-rows"></code-line>    # 使用cursor()方法获取操作游标
<code-line class="line-numbers-rows"></code-line>    cursor = db.cursor()
<code-line class="line-numbers-rows"></code-line>    ket = []
<code-line class="line-numbers-rows"></code-line>    # SQL 插入语句
<code-line class="line-numbers-rows"></code-line>    ket = []
<code-line class="line-numbers-rows"></code-line>    sql = "select keywords from key_sheet_car where keywords = '%s'" % keyword
<code-line class="line-numbers-rows"></code-line>    try:
<code-line class="line-numbers-rows"></code-line>        # 执行sql语句
<code-line class="line-numbers-rows"></code-line>        cursor.execute(sql)
<code-line class="line-numbers-rows"></code-line>        ket = list(cursor.fetchall())
<code-line class="line-numbers-rows"></code-line>        db.commit()
<code-line class="line-numbers-rows"></code-line>    except:
<code-line class="line-numbers-rows"></code-line>        print("查询数据失败")
<code-line class="line-numbers-rows"></code-line>        # 发生错误时回滚
<code-line class="line-numbers-rows"></code-line>        db.rollback()
<code-line class="line-numbers-rows"></code-line>    # 关闭数据库连接
<code-line class="line-numbers-rows"></code-line>    db.close()
<code-line class="line-numbers-rows"></code-line>    if ket:
<code-line class="line-numbers-rows"></code-line>        return False
<code-line class="line-numbers-rows"></code-line>    else:
<code-line class="line-numbers-rows"></code-line>        return True
<code-line class="line-numbers-rows"></code-line>
<code-line class="line-numbers-rows"></code-line>
<code-line class="line-numbers-rows"></code-line>if __name__ == '__main__':
<code-line class="line-numbers-rows"></code-line>    getdata()
<code-line class="line-numbers-rows"></code-line>
</code-pre>

 

3、项目结构

  

 

 

 

 


__EOF__

本文作者往心。
本文链接https://www.cnblogs.com/lx06/p/15650851.html
关于博主:评论和私信会在第一时间回复。或者直接私信我。
版权声明:本博客转载请注明出处!
声援博主:如果您觉得文章对您有帮助,可以点击文章右下角推荐一下。您的鼓励是博主的最大动力!
posted @   往心。  阅读(77)  评论(0编辑  收藏  举报
编辑推荐:
· .NET Core 中如何实现缓存的预热?
· 从 HTTP 原因短语缺失研究 HTTP/2 和 HTTP/3 的设计差异
· AI与.NET技术实操系列:向量存储与相似性搜索在 .NET 中的实现
· 基于Microsoft.Extensions.AI核心库实现RAG应用
· Linux系列:如何用heaptrack跟踪.NET程序的非托管内存泄露
阅读排行:
· TypeScript + Deepseek 打造卜卦网站:技术与玄学的结合
· 阿里巴巴 QwQ-32B真的超越了 DeepSeek R-1吗?
· 【译】Visual Studio 中新的强大生产力特性
· 【设计模式】告别冗长if-else语句:使用策略模式优化代码结构
· AI与.NET技术实操系列(六):基于图像分类模型对图像进行分类
点击右上角即可分享
微信分享提示