1、导包
import pandas as pd
from pyecharts.charts import Bar, Pie, Map, Line, Scatter
from pyecharts import options as opts
from pyecharts.render import make_snapshot
from snapshot_selenium import snapshot
from pyecharts.globals import CurrentConfig
2、读取数据
df = pd.read_excel("./xlsx/二手房数据.xlsx" )
3、数据清洗
print (df.head())
print (df.describe())
print (df.isnull().sum ())
print (df["电梯" ].fillna("未知" , inplace=True ))
print (df.isnull().sum ())
4、数据可视化
可视化展示-北京各城区二手房数量地图分布
nums = df.groupby("市区" )["小区" ].agg("count" )
citys = nums.index.tolist()
city = [i + "区" for i in citys]
xqnum = nums.values.tolist()
map = (
Map(init_opts=opts.InitOpts(bg_color="white" ))
.add("" , [list (z) for z in zip (city, xqnum)], "北京" )
.set_global_opts(title_opts=opts.TitleOpts(title="北京市二手房区分布" ),
visualmap_opts=opts.VisualMapOpts(is_show=True , max_=3000 ))
)
make_snapshot(snapshot, map .render("beiJingMap.html" ), "./beiJingMap.png" )
可视化展示-北京各城区二手房数量-平均价格柱状图
prices = df.groupby("市区" )["价格(万元)" ].agg("mean" )
prices = prices.round (2 )
price = prices.values.tolist()
bar = (
Bar(init_opts=opts.InitOpts(bg_color="white" ))
.add_xaxis(citys)
.add_yaxis("数量" , xqnum)
.extend_axis(yaxis=opts.AxisOpts(is_show=True , max_=900 , min_=200 , name="价格(万元)" ))
.set_global_opts(yaxis_opts=opts.AxisOpts(name="数量" ),
title_opts=opts.TitleOpts(title="各城区二手房数量-平均价格柱状图" ),
tooltip_opts=opts.TooltipOpts(is_show=True , trigger="axis" , axis_pointer_type="cross" ),
xaxis_opts=opts.AxisOpts(axispointer_opts=opts.AxisPointerOpts(is_show=True , type_="shadow" )))
)
line = (
Line()
.add_xaxis(citys)
.add_yaxis("价格" , price, z=10 , yaxis_index=1 )
)
bar.overlap(line)
make_snapshot(snapshot, bar.render("./Mean房价数量.html" ), "./Mean房价数量.png" )
可视化展示-二手房价格最高的TOP15
df.sort_values(by="价格(万元)" , ascending=False , inplace=True )
top_price = df["价格(万元)" ].apply("{0:.0f}" .format ).head(15 ).tolist()
xiaoqu = df["小区" ].head(15 ).tolist()
top_bar = (
Bar(init_opts=opts.InitOpts(bg_color="white" ))
.add_xaxis(xiaoqu)
.add_yaxis("数量" , top_price)
.set_global_opts(xaxis_opts=opts.AxisOpts(name="数量" ),
yaxis_opts=opts.AxisOpts(name="价格(万元)" ))
)
make_snapshot(snapshot,top_bar.render("./TOP15房价最高.html" ),"./TOP15房价最高.png" )
装修情况-有无电梯(玫瑰图)
zhuangxiu = df.groupby("装修情况" )["装修情况" ].agg("count" )
x_zx = zhuangxiu.index.tolist()
y_num = zhuangxiu.values.tolist()
dianti = df.groupby("电梯" )["电梯" ].agg("count" )
youdt = dianti.index.tolist()
dt_num = dianti.values.tolist()
youdt.pop()
dt_num.pop()
zx_bar = (
Bar(init_opts=opts.InitOpts(bg_color="white" ))
.add_xaxis(x_zx)
.add_yaxis("" , y_num, category_gap="50%" )
.set_global_opts(legend_opts=opts.LegendOpts(pos_left='85%' , pos_top="63%" , orient="scroll" ),
yaxis_opts=opts.AxisOpts(name="装修情况" ),
xaxis_opts=opts.AxisOpts(name="数量" ))
.set_series_opts(label_opts=opts.LabelOpts(position="right" ))
.reversal_axis()
)
zx_pie = (
Pie()
.add("" , [list (z) for z in zip (youdt, dt_num)], radius=["8%" , "25%" ], rosetype="radius" , center=["75%" , "65%" ])
.set_series_opts(label_opts=opts.LabelOpts(formatter="{b}:{c}\n({d}%)" ))
)
zx_bar.overlap(zx_pie)
make_snapshot(snapshot, zx_bar.render("./装修情况.html" ), "./装修情况.png" )
二手房总价与面积(散点图)
jg = df["价格(万元)" ].tolist()
mj = df["面积(㎡)" ].tolist()
scatter = (
Scatter(init_opts=opts.InitOpts(bg_color="white" ))
.add_xaxis(mj)
.add_yaxis("" , jg)
.set_global_opts(xaxis_opts=opts.AxisOpts(type_="value" , name="面积(㎡)" ),
yaxis_opts=opts.AxisOpts(name="价格(万元)" ))
)
make_snapshot(snapshot,scatter.render("./散点图.html" ),"./散点图.png" )
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 周边上新:园子的第一款马克杯温暖上架
· 分享 3 个 .NET 开源的文件压缩处理库,助力快速实现文件压缩解压功能!
· Ollama——大语言模型本地部署的极速利器
· DeepSeek如何颠覆传统软件测试?测试工程师会被淘汰吗?
· 使用C#创建一个MCP客户端