dic_class={'4':3,'5':5,'1':1}
dic_class_sorted = sorted(dic_class, key=dic_class.__getitem__, reverse=True)
print(dic_class_sorted)
输出:
['5', '4', '1']
sorted函数:返回的是字典的键,reverse=True,按照字典里面的值从大到小排序
dic = {0:2,4:2,3:5}
print(sorted(dic.items(),key= lambda x:x[0])) # x[0]按键排序,x[1]按值排序
print(dic[0]) # 键为0的对应的值
结果:
[(0, 2), (3, 5), (4, 2)]
2
2.sklearn中出现的警告信息处理
import warnings
warnings.filterwarnings(module='sklearn*', action='ignore', category=DeprecationWarning)
或者
from sklearn.utils.testing import ignore_warnings from sklearn.exceptions import ConvergenceWarning
在定义具体方法前加入
@ignore_warnings(category=ConvergenceWarning)
3.保存xgboost的分类模型
gbdt_model = XGBClassifier(n_estimators=50, max_depth=9, learning_rate=0.1, subsample=1, colsample_bytree=1)
gbdt_model=gbdt_model.fit(train_data, train_labels)
pickle.dump(gbdt_model, open("qiye_city_model.dat", "wb"))# 保存模型
# gbdt_model = pickle.load(open("qiye_city_model.dat", "rb"))#加载模型
4. 读取excel数据
dataset = pd.read_excel(r'./train\train.xlsx')
data = dataset.iloc[:, 3:] #所有行,第三列以后
data = np.array(data)
data = np.round(data, decimals=5) # 保留5位
label = dataset['label'] #取标签列
label = np.array(label)
5.保存到excel
import pandas as pd
train_fake = np.array(train_fake)
writer = pd.ExcelWriter(r'./train\train.xlsx')
df = pd.DataFrame(data=train_fake)
df.to_excel(writer,index=False)
writer.save()
另一种简单方式:
1 def txt2excel():
2 # txt格式改为Excel格式
3 data = np.loadtxt('./test.txt',delimiter=',',dtype=float)
4 df = pd.DataFrame(data)
5 with pd.ExcelWriter('./test.xlsx') as writer:
6 df.to_excel(writer,header=False,index=False)#不要表头编号,不要列索引
6.sklearn中的onehot编码
参考链接
https://www.cnblogs.com/zhoukui/p/9159909.html