嵌入式SQL与数据库连接
python项目代码
点击查看代码
# -*- coding: utf-8 -*-
import jieba
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
import pymysql
import wordcloud as wc
db = pymysql.connect(host='120.46.140.212',
user="root",
passwd="**********",
port=3306,
db="keywords", # 数据库名称
charset='utf8', # 字符编码
)
cursor = db.cursor()
####################################################
# sql_op1 = '''
# CREATE TABLE `keywords`.`passage_001` (
# `id` INT(3) UNSIGNED NOT NULL,
# `word` VARCHAR(20) NOT NULL,
# `num` INT(3) UNSIGNED NOT NULL,
# PRIMARY KEY (`id`)
# ) ENGINE = InnoDB
# DEFAULT CHARACTER SET = utf8mb4
# COLLATE = utf8mb4_general_ci;
# '''
# cursor.execute(sql_op1) # 执行建表
# ##########################################
# txt = open("result.txt", "r", encoding='utf-8').read()
# words = jieba.lcut(txt) # 使用精确模式对文本进行分词
# counts = {} # 通过键值对的形式存储词语及其出现的次数
# c = 0
# for word in words:
# c = c + 1
# if len(word) == 1: # 单个词语不计算在内
# continue
# else:
# counts[word] = counts.get(word, 0) + 1 # 遍历所有词语,每出现一次其对应的值加 1
#
# items = list(counts.items())
# items.sort(key=lambda x: x[1], reverse=True) # 根据词语出现的次数进行从大到小排序
#
# for i in range(c + 1):
# word, count = items[i]
# if count < 9: # 只统计出现次数大于等于9的
# break
# sql_op2 = "insert into passage_001 values (%s,'%s',%s);" % (i, word, count)
# # 传入值
# cursor.execute(sql_op2) # 执行插入
# db.commit()
##############################################
# sql_op3 = "select word,num from passage_001"
# cursor.execute(sql_op3)
# list1 = []
# list2 = []
# alldata = cursor.fetchall()
# print(alldata)
# for i in alldata:
# list1.append(i[0])
# list2.append(i[1])
# dic = dict(zip(list1, list2))
# print(dic)
# img = Image.open('2.jpg')
# img_array = np.array(img)
# wcld = wc.WordCloud(font_path='FZXBSK.TTF',
# mask=img_array,
# max_words=200,
# contour_color='white',
# width=1000,
# height=1000,
# margin=2,
# colormap="YlOrRd"
# )
# wcld.generate_from_frequencies(dic)
# plt.imshow(wcld)
# plt.axis('off')
# plt.show()
# wcld.to_file("photo0.jpg")
##############################################
# 关闭游标
cursor.close()
# 关闭连接
db.close()
没有被注释的部分是公共部分,代码块的注释被分成3部分,分割线为一连串的#号,公共部分为连接数据库操作,第一部分是连接数据库建表操作;第二部分是爬取文本,并将文本数据插入数据库的操作;第三部分是将数据库中的内容提取,并对提取内容做词云图。
该代码不能直接复制到本地执行!
这一点小白请注意,直接复制到本地执行根本不能得到结果!
该代码仅作思路展示!