python-DTW
python-DTW
import xlrd
import numpy as np
import matplotlib.pyplot as plt
# dtaidistance!!!优选
from dtaidistance import dtw
from dtaidistance import dtw_visualisation as dtwvis
# 归一化
from sklearn.preprocessing import MinMaxScaler
# 标准睡美人
wb = xlrd.open_workbook("ex.xlsx")
# 待测数据
'''
"1990-1999.xlsx"
wb_1990_1999
wb.sheet_by_index(0)--> 1990
wb.sheet_by_index(1)--> 1991
···
wb.sheet_by_index(9)--> 1999
"2000_2010.xlsx"
wb_2000_2010
wb.sheet_by_index(0)--> 2000
···
wb.sheet_by_index(10)--> 2010
'''
wb_1990_1999 = xlrd.open_workbook("1990-1999.xlsx")
wb_2000_2010 = xlrd.open_workbook("2000-2010.xlsx")
sh = wb.sheet_by_index(0)
# 一张Excel的第一个sheet 1990
#sh_1990 = wb_1990_1999.sheet_by_index(0)
# 另一张Excel的第一个sheet 2000
#sh_2000 = wb_2000_2010.sheet_by_index(0)
# 批量读取sheet
names = locals()
for i in range(0,10):
names["sh_"+str(1990+i)] = wb_1990_1999.sheet_by_index(i)
names = locals()
for i in range(0,11):
names["sh_"+str(2000+i)] = wb_2000_2010.sheet_by_index(i)
数据预处理
# 睡美人文献
# 存放每年被引频次
table = []
for row in range(0, sh.nrows):
# y里面保存的是每年文献的被引次数。
Y = sh.row_values(row)[0:]
table.append(Y)
print(table)
names = locals()
for i in range(0,21):
# 每年创建一个空列表
names["table_"+str(1990+i)] = []
# 针对每一个sheet 按年读取
for row in range(0, names["sh_"+str(1990+i)].nrows):
# y里面保存的是每年文献的被引次数
Y = names["sh_"+str(1990+i)].row_values(row)[0:]
names["table_"+str(1990+i)].append(Y)
print(str(1990+i)+"ok!")
# 归一化
scaler = MinMaxScaler()
# 十篇标准睡美人数据归一化
s1 = []
s1.append(scaler.fit_transform(np.array(table[0][:103], dtype=np.double).reshape(-1, 1)))
s1.append(scaler.fit_transform(np.array(table[1][:60], dtype=np.double).reshape(-1, 1)))
s1.append(scaler.fit_transform(np.array(table[2][:116], dtype=np.double).reshape(-1, 1)))
s1.append(scaler.fit_transform(np.array(table[3][:115], dtype=np.double).reshape(-1, 1)))
s1.append(scaler.fit_transform(np.array(table[4][:82], dtype=np.double).reshape(-1, 1)))
s1.append(scaler.fit_transform(np.array(table[5][:86], dtype=np.double).reshape(-1, 1)))
s1.append(scaler.fit_transform(np.array(table[6][:58], dtype=np.double).reshape(-1, 1)))
s1.append(scaler.fit_transform(np.array(table[7][:85], dtype=np.double).reshape(-1, 1)))
s1.append(scaler.fit_transform(np.array(table[8][:75], dtype=np.double).reshape(-1, 1)))
s1.append(scaler.fit_transform(np.array(table[9][:102], dtype=np.double).reshape(-1, 1)))
# 待测数据归一化处理
names = locals()
for i in range(0,21):
# 每年创建一个空列表,存放归一化后的数据
names["s2_"+str(1990+i)] = []
# 每年有多少条数据
names["num_table_"+str(1990+i)] = len(names["table_"+str(1990+i)])
# 存储
for j in range(0,names["num_table_"+str(1990+i)]):
names["s2_"+str(1990+i)].append(scaler.fit_transform(np.array(names["table_"+str(1990+i)][j], dtype=np.double).reshape(-1, 1)))
print(str(1990+i)+"over")