嵌入式SQL与数据库连接

python项目代码

点击查看代码
# -*- coding: utf-8 -*-
import jieba
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
import pymysql
import wordcloud as wc



db = pymysql.connect(host='120.46.140.212',
                     user="root",
                     passwd="**********",
                     port=3306,
                     db="keywords",  # 数据库名称
                     charset='utf8',  # 字符编码
                     )
cursor = db.cursor()
####################################################
# sql_op1 = '''
#             CREATE TABLE `keywords`.`passage_001` (
#             `id` INT(3) UNSIGNED NOT NULL,
#             `word` VARCHAR(20) NOT NULL,
#             `num` INT(3) UNSIGNED NOT NULL,
#             PRIMARY KEY (`id`)
#         )	ENGINE = InnoDB
#             DEFAULT CHARACTER SET = utf8mb4
#             COLLATE = utf8mb4_general_ci;
#             '''
# cursor.execute(sql_op1)  # 执行建表
# ##########################################
# txt = open("result.txt", "r", encoding='utf-8').read()
# words = jieba.lcut(txt)  # 使用精确模式对文本进行分词
# counts = {}  # 通过键值对的形式存储词语及其出现的次数
# c = 0
# for word in words:
#     c = c + 1
#     if len(word) == 1:  # 单个词语不计算在内
#         continue
#     else:
#         counts[word] = counts.get(word, 0) + 1  # 遍历所有词语,每出现一次其对应的值加 1
#
# items = list(counts.items())
# items.sort(key=lambda x: x[1], reverse=True)  # 根据词语出现的次数进行从大到小排序
#
# for i in range(c + 1):
#     word, count = items[i]
#     if count < 9:  # 只统计出现次数大于等于9的
#         break
#     sql_op2 = "insert into passage_001 values (%s,'%s',%s);" % (i, word, count)
#     # 传入值
#     cursor.execute(sql_op2)  # 执行插入
#     db.commit()
##############################################
# sql_op3 = "select word,num from passage_001"
# cursor.execute(sql_op3)
# list1 = []
# list2 = []
# alldata = cursor.fetchall()
# print(alldata)
# for i in alldata:
#     list1.append(i[0])
#     list2.append(i[1])
# dic = dict(zip(list1, list2))
# print(dic)
# img = Image.open('2.jpg')
# img_array = np.array(img)
# wcld = wc.WordCloud(font_path='FZXBSK.TTF',
#                     mask=img_array,
#                     max_words=200,
#                     contour_color='white',
#                     width=1000,
#                     height=1000,
#                     margin=2,
#                     colormap="YlOrRd"
#                     )
# wcld.generate_from_frequencies(dic)
# plt.imshow(wcld)
# plt.axis('off')
# plt.show()
# wcld.to_file("photo0.jpg")

##############################################

# 关闭游标
cursor.close()
# 关闭连接
db.close()

没有被注释的部分是公共部分,代码块的注释被分成3部分,分割线为一连串的#号,公共部分为连接数据库操作,第一部分是连接数据库建表操作;第二部分是爬取文本,并将文本数据插入数据库的操作;第三部分是将数据库中的内容提取,并对提取内容做词云图。
该代码不能直接复制到本地执行!
这一点小白请注意,直接复制到本地执行根本不能得到结果!
该代码仅作思路展示!

posted @ 2021-12-18 12:44  倔强jujiang  阅读(138)  评论(0编辑  收藏  举报