python 批量打开数据

import xlrd
import xlwt
from datetime import date,datetime
import os

from sklearn.cluster import KMeans
import collections
import pandas
import numpy
import re





def opp(ggv):

    workbook = xlrd.open_workbook(ggv)

    #print (workbook.sheet_names()) # [u'sheet1', u'sheet2']

    table = workbook.sheet_by_name(u'准确概率计算逻辑-存在竞品对比部分')
    a=table.col_values(7)
    #print('a=',a)
    #print(a[1:])
    return a[1:]

#print(opp(ggv))
#fopen = open('file', 'r')




def k_means(pp):

    pv=list(pp)
    if len(set(pv))>3:
        gf=numpy.array([pv]).T
        estimator = KMeans(n_clusters=3)#构造聚类器
        estimator.fit(gf)#聚类

        label_pred = estimator.labels_ #获取聚类标签
        aa=collections.Counter(label_pred)
        v=pandas.Series(aa)
        #print(v)
        gg=list(v)
        #print(gg)
        index_max=gg.index(max(gg))


        centroids = estimator.cluster_centers_ 
        ppv=centroids.flatten()
        #print(ppv)#获取聚类中心
        ffv=pandas.DataFrame()
        ffv['分类个数']=gg
        ffv['某类对应价格']=ppv
        ffv=ffv.sort_values(by='分类个数',ascending=False)
        #print(ffv)
        #inertia = estimator.inertia_ # 获取聚类准则的总和
        center=centroids[index_max][0]
        return (int(center))
    else:
        return 0
"""
changzhou=pandas.read_excel('cz.xlsx')

data=changzhou['房价网均价']
"""
lis = os.listdir()#列出目录下的所有文件和目录
ll=pandas.Series(lis)
lld=ll[ll!='jjj.py']
#data=(data//500)*500+250
#out=k_means(data)

bb=[]

for i in lld:
    aa=[0,0]
    ggv=i
    #print(ggv)
    ddf=opp(ggv)
    #print(ddf)
    out=k_means(ddf)
    st = re.sub("[A-Za-z0-9\!\%\[\]\,\。]", "", i)
    aa[0]=st
    aa[1]=out
    bb.append(aa)
    print(aa)

bbcc=pandas.DataFrame(bb,columns=['city','price'])

bbcc=bbcc[bbcc['price']>0]

bbcc.to_excel('bbcc.xls')
posted @ 2022-08-19 22:59  luoganttcc  阅读(24)  评论(0编辑  收藏  举报