9.5
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols
data = pd.DataFrame({
"city": [1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4],
"location": ["A1"] * 4 + ["A2"] * 4 + ["A3"] * 4 + ["A3"] * 4,
"advertisement": ["B1"] * 2 + ["B2"] * 2 + ["B1"] * 2 + ["B2"] * 2 + ["B1"] * 2 + ["B2"] * 2 + ["B1"] * 2 + ["B2"] * 2,
"decoration": ["C1"] + ["C2"] + ["C1"] + ["C2"] + ["C1"] + ["C2"] + ["C1"] + ["C2"] + ["C1"] + ["C2"] + ["C1"] + ["C2"] + ["C1"] + ["C2"] + ["C1"] + ["C2"],
"sales": [955, 927, 905, 855, 880, 860, 870, 830, 875, 870, 870, 821, 967, 949, 930, 860, 890, 840, 865, 850, 960, 950, 910, 880, 895, 850, 845, 900, 980, 930, 920, 875, 900, 830, 860, 855, 848]})
model = ols('销售量 ~ C(地理位置) + C(广告) + C(装潢)', data=data).fit()
进行方差分析
anova_table = sm.stats.anova_lm(model, typ=2)
print(anova_table)
根据 p 值判断是否有显著影响
if anova_table.loc['地理位置', 'PR(>F)'] < 0.05:
print("地理位置对销售量有显著影响")
else:
print("地理位置对销售量没有显著影响")
if anova_table.loc['广告', 'PR(>F)'] < 0.05:
print("广告对销售量有显著影响")
else:
print("广告对销售量没有显著影响")
if anova_table.loc['装潢', 'PR(>F)'] < 0.05:
print("装潢对销售量有显著影响")
else:
print("装潢对销售量没有显著影响")