贝叶斯网络——看来我要的是参数评估
Python的贝叶斯网络学习库pgmpy介绍和使用
pgmpy
Parameter learning: Given a set of data samples and a DAG that captures the dependencies between the variables, estimate the (conditional) probability distributions of the individual variables.
Structure learning: Given a set of data samples, estimate a DAG that captures the dependencies between the variables.
pgmpy.org
github.com/pgmpy/pgmpy_notebook/blob/master/blob/master/notebooks
代码记录
包
"""
学习链接 :
http://pgmpy.org/
https://github.com/pgmpy/pgmpy_notebook/blob/master/notebooks/9.%20Learning%20Bayesian%20Networks%20from%20Data.ipynb
"""
# ====================BN模型=========================
# 贝叶斯模型
from pgmpy.models import BayesianModel
# ====================参数学习=========================
# 参数估计
from pgmpy.estimators import ParameterEstimator
# MLE参数估计
from pgmpy.estimators import MaximumLikelihoodEstimator
# Bayesian参数估计
from pgmpy.estimators import BayesianEstimator
# ====================结构学习=========================
# ========评分搜索=========================
# 评分
from pgmpy.estimators import BdeuScore, K2Score, BicScore
# 穷举搜索
from pgmpy.estimators import ExhaustiveSearch
# 爬山搜索
from pgmpy.estimators import HillClimbSearch
# ======== 约束 =========================
from pgmpy.estimators import ConstraintBasedEstimator
# 独立性
from pgmpy.independencies import Independencies
# ======== 混合 =========================
from pgmpy.estimators import MmhcEstimator
# ==================== 通用库 =========================
import pandas as pd
import numpy as np
parameter Learning
def parameterLearning():
data = pd.DataFrame(data={'fruit': ["banana", "apple", "banana", "apple", "banana","apple", "banana",
"apple", "apple", "apple", "banana", "banana", "apple", "banana",],
'tasty': ["yes", "no", "yes", "yes", "yes", "yes", "yes",
"yes", "yes", "yes", "yes", "no", "no", "no"],
'size': ["large", "large", "large", "small", "large", "large", "large",
"small", "large", "large", "large", "large", "small", "small"]})
model = BayesianModel([('fruit', 'tasty'), ('size', 'tasty')]) # fruit -> tasty <- size
print("========================================================")
pe = ParameterEstimator(model, data)
print("\n", pe.state_counts('fruit')) # unconditional
print("\n", pe.state_counts('size')) # unconditional
print("\n", pe.state_counts('tasty')) # conditional on fruit and size
print("========================================================")
mle = MaximumLikelihoodEstimator(model, data)
print(mle.estimate_cpd('fruit')) # unconditional
print(mle.estimate_cpd('tasty')) # conditional
print("========================================================")
est = BayesianEstimator(model, data)
print(est.estimate_cpd('tasty', prior_type='BDeu', equivalent_sample_size=10))
# Setting equivalent_sample_size to 10 means
# that for each parent configuration, we add the equivalent of 10 uniform samples
# (here: +5 small bananas that are tasty and +5 that aren't).
print("========================================================")
# Calibrate all CPDs of `model` using MLE:
model.fit(data, estimator=MaximumLikelihoodEstimator)
print("========================================================")
# generate data
data = pd.DataFrame(np.random.randint(low=0, high=2, size=(5000,