zhuge
# -*- coding: utf-8 -*-
"""
Created on Fri Dec 1 18:08:06 2017
@author: Administrator
"""
def popo(dmi,columns):
most_area=Counter(dmi[columns]).most_common(1)
if most_area[0][1]>=2:
dmi2=dmi[dmi[columns]==most_area[0][0]]
print('dmi=',dmi)
#dmi2=dmi1
dmiy=lower_dimention(dmi2)
ratio2=(dmiy.max()-dmiy.min())/dmiy.mean()
ww1=round(k_means( dmiy ),2)
ww2=round(k_means(dmi2[columns]),2)
print('dmi2=',dmi2)
if ratio2<0.1 and len(dmiy)>1:
return [ww1,ww2,10]
print('area_filter',dd)
else:
return [ww1,ww2,len(dmiy)]
dd=numpy.array([[None,None,None],[None,None,None],[None,None,None]])
dd[0]=popo(dmi2,'room')
def filter_data(dmi):
dd=numpy.array([[None,None,None],[None,None,None],[None,None,None]])
#dmi_values=dmi['total_price']
#ratio1=(dmi_values.max()-dmi_values.min())/dmi_values.mean()
#cred1=set(dmi['url'])
#avg1=dmi['total_price']/dmi['area']
dmix=lower_dimention(dmi)
ratio1=(dmix.max()-dmix.min())/dmix.mean()
vv1=round(k_means( dmix ),2)
vv2=round(k_means(dmi['area']),2)
print('x'*100)
print(dmi)
if ratio1<0.1 and len(dmix)>1:
dd[0]=[vv1,vv2,10]
print('room_filter',dd)
else:
dd[0]=[vv1,vv2,len(dmix)]
most_area=Counter(dmi['area']).most_common(1)
if most_area[0][1]>=2:
dmi2=dmi[dmi['area']==most_area[0][0]]
print('dmi=',dmi)
#dmi2=dmi1
dmiy=lower_dimention(dmi2)
ratio2=(dmiy.max()-dmiy.min())/dmiy.mean()
ww1=round(k_means( dmiy ),2)
ww2=round(k_means(dmi2['area']),2)
print('dmi2=',dmi2)
#dmi2_values=dmi2['total_price']
#ratio2=(dmi2_values.max()-dmi2_values.min())/dmi2_values.mean()
#cred2=set(dmi2['url'])
if ratio2<0.1 and len(dmiy)>1:
#avg1=dmi['total_price']/dmi['area']
#avg2=dmi2['total_price']/dmi2['area']
#ww1=round(k_means( avg2 ),2)
#ww2=round(k_means(dmi['area']),2)
dd[1]=[ww1,ww2,10]
print('area_filter',dd)
else:
dd[1]=[ww1,ww2,len(dmiy)]
print('****************************************'*10)
#print(dmi2)
#print(dmi2['floor'])
most_floor=Counter(dmi2['floor']).most_common(1)
#dmi2['floor']=dmi2['floor'].fillna(most_floor[0][0])
if most_floor[0][1]>=2:
dmi3=dmi2[dmi2['floor']==most_floor[0][0]]
#dmi2=dmi1
print('dmi3=',dmi3)
dmiz=lower_dimention(dmi3)
ratio3=(dmiz.max()-dmiz.min())/dmiz.mean()
xx1=round(k_means( dmiz ),2)
xx2=round(k_means(dmi3['area']),2)
#dmi3=dmi2[dmi2['floor']==most_floor[0][0]]
#dmi3_values=dmi3['total_price']
#ratio3=(dmi3_values.max()-dmi3_values.min())/dmi3_values.mean()
#cred3=set(dmi2['url'])
if ratio3<0.1 and len(dmiz)>1:
#avg3=dmi3['total_price']/dmi3['area']
#xx1=round(k_means( dimz ),2)
#xx2=round(k_means(dmi['area']),2)
dd[2]=[xx1,xx2,10]
print(dd)
else:
dd[2]=[xx1,xx2,len(dmiz)]
return dd
ddr=filter_data(dmi2)