线性判别分析(fisher)

线性判别分析

线性判别分析中有降维，把数据都投影到同一条线上，然后在直线上取一个阈值，将直线分成两条射线，每一条代表一个分类。会损失一些数据信息，但如果这些信息是一些干扰信息，丢失也未尝不是好事。

线性判别分析之后的结果是一个向量，其他的不行吗？

主要指导思想(目标)：类内小，类间大。

公式推导

我们得到的是向量，为了方便计算损失，不妨设 $| | w w | | = 1$ ，每一个数据 $X X_{i}$ 看作一个向量。那么 $X X_{i} w w$ 是每个数据在 $w w$ 方向上的投影。与 $w w$ 的其中一个平面是划分平面。

两个不同类别分别命名为 $C_{1}$ 和 $C_{2}$ ，用 $μ μ$ , $μ μ_{C_{1}}$ , $μ μ_{C_{2}}$ 分别代表全部数据， $C_{1}$ 数据, $C_{2}$ 数据的均值，用 $Σ Σ$ , $Σ Σ_{C_{1}}$ , $Σ Σ_{C_{2}}$ 分别代表全部数据， $C_{1}$ 数据, $C_{2}$ 数据的协方差矩阵。
$\tilde{μ}$ 和 ${\tilde{σ}}^{2}$ 表示投影的均值和方差。

\begin{array}{ccl} μ μ & = & \frac{1}{N} \sum_{1}^{N} X X_{i} \\ μ μ_{C_{1}} & = & \frac{1}{N_{C_{1}}} \sum_{1}^{N_{C_{1}}} X X_{C_{1} i} \\ μ μ_{C_{2}} & = & \frac{1}{N_{C_{2}}} \sum_{1}^{N_{C_{2}}} X X_{C_{1} i} \\ Σ Σ & = & \frac{1}{N} \sum_{1}^{N} (X X_{i} - μ μ) (X X_{i} - μ μ)^{T} \\ Σ Σ_{C_{1}} & = & \frac{1}{N_{C_{1}}} \sum_{1}^{N_{C_{1}}} (X X_{C_{1} i} - μ μ_{C_{1}}) (X X_{C_{1} i} - μ μ_{C_{1}})^{T} \\ Σ Σ_{C_{2}} & = & \frac{1}{N_{C_{2}}} \sum_{1}^{N_{C_{2}}} (X X_{C_{2} i} - μ μ_{C_{2}}) (X X_{C_{2} i} - μ μ_{C_{2}})^{T} \end{array}

\begin{array}{ccl} \tilde{μ} & = & \frac{1}{N} \sum_{1}^{N} X X_{i} θ θ \\ {\tilde{μ}}_{C_{1}} & = & \frac{1}{N_{C_{1}}} \sum_{1}^{N_{C_{1}}} X X_{C_{1} i} θ θ \\ {\tilde{μ}}_{C_{2}} & = & \frac{1}{N_{C_{2}}} \sum_{1}^{N_{C_{2}}} X X_{C_{2} i} θ θ \\ {\tilde{σ}}^{2} & = & \frac{1}{N} \sum_{1}^{N} (X X_{i} θ θ - \tilde{μ})^{2} \\ {\tilde{σ}}_{C_{1}}^{2} & = & \frac{1}{N_{C_{1}}} \sum_{1}^{N_{C_{1}}} (X X_{C_{1} i} θ θ - {\tilde{μ}}_{C_{1}})^{2} \\ {\tilde{σ}}_{C_{2}}^{2} & = & \frac{1}{N_{C_{2}}} \sum_{1}^{N_{C_{2}}} (X X_{C_{2} i} θ θ - {\tilde{μ}}_{C_{2}})^{2} \end{array}

类间： $({\tilde{μ}}_{C_{1}} - {\tilde{μ}}_{C_{2}})^{2}$

类内： ${\tilde{σ}}_{C_{1}}^{2} + {\tilde{σ}}_{C_{2}}^{2}$

目标函数: $J (θ θ) = \frac{({\tilde{μ}}_{C_{1}} - {\tilde{μ}}_{C_{2}})^{2}}{{\tilde{σ}}_{C_{1}}^{2} + {\tilde{σ}}_{C_{2}}^{2}}$

\begin{array}{ccl} J (θ θ) & = & \frac{({\tilde{μ}}_{C_{1}} - {\tilde{μ}}_{C_{2}})^{2}}{{\tilde{σ}}_{C_{1}}^{2} + {\tilde{σ}}_{C_{2}}^{2}} \\ 分 子 & = & ({\tilde{μ}}_{C_{1}} - {\tilde{μ}}_{C_{2}})^{2} \\ = & (\frac{1}{N_{C_{1}}} \sum_{1}^{N_{C_{1}}} X X_{C_{1} i} θ θ - \frac{1}{N_{C_{2}}} \sum_{1}^{N_{C_{2}}} X X_{C_{2} i} θ θ)^{2} \\ = & ((μ μ_{C_{1}} - μ μ_{C_{2}}) θ θ)^{2} \\ = & θ θ^{T} (μ μ_{C_{1}} - μ μ_{C_{2}})^{T} (μ μ_{C_{1}} - μ μ_{C_{2}}) θ θ \\ {\tilde{σ}}_{C_{1}}^{2} & = & \frac{1}{N_{C_{1}}} \sum_{1}^{N_{C_{1}}} (X X_{C_{1} i} θ θ - {\tilde{μ}}_{C_{1}})^{2} \\ = & \frac{1}{N_{C_{1}}} \sum_{1}^{N_{C_{1}}} (X X_{C_{1} i} θ θ - \frac{1}{N_{C_{1}}} \sum_{1}^{N_{C_{1}}} X X_{C_{1} i} θ θ)^{2} \\ = & \frac{1}{N_{C_{1}}} \sum_{1}^{N_{C_{1}}} ((X X_{C_{1} i} - \frac{1}{N_{C_{1}}} \sum_{1}^{N_{C_{1}}} X X_{C_{1} i}) θ θ)^{2} \\ = & \frac{1}{N_{C_{1}}} \sum_{1}^{N_{C_{1}}} ((X X_{C_{1} i} - μ μ_{C_{1}}) θ θ)^{2} \\ = & \frac{1}{N_{C_{1}}} \sum_{1}^{N_{C_{1}}} θ θ^{T} (X X_{C_{1} i} - μ μ_{C_{1}})^{T} (X X_{C_{1} i} - μ μ_{C_{1}}) θ θ \\ = & θ θ^{T} (\frac{1}{N_{C_{1}}} \sum_{1}^{N_{C_{1}}} (X X_{C_{1} i} - μ μ_{C_{1}})^{T} (X X_{C_{1} i} - μ μ_{C_{1}})) θ θ \\ = & θ θ^{T} Σ Σ_{C_{1}} θ θ \\ {\tilde{σ}}_{C_{2}}^{2} & = & θ θ^{T} Σ Σ_{C_{2}} θ θ \\ 分 母 & = & θ θ^{T} Σ Σ_{C_{1}} θ θ + θ θ^{T} Σ Σ_{C_{2}} θ θ \\ = & θ θ^{T} (Σ Σ_{C_{1}} + Σ Σ_{C_{2}}) θ θ \end{array}

$∴$

$\begin{array}{ccl} J (θ θ) & = & \frac{θ θ^{T} (μ μ_{C_{1}} - μ μ_{C_{2}})^{T} (μ μ_{C_{1}} - μ μ_{C_{2}}) θ θ}{θ θ^{T} (Σ Σ_{C_{1}} + Σ Σ_{C_{2}}) θ θ} \end{array}$

设 $S_{b} = (μ μ_{C_{1}} - μ μ_{C_{2}})^{T} (μ μ_{C_{1}} - μ μ_{C_{2}})$ , $S_{w} = Σ Σ_{C_{1}} + Σ Σ_{C_{2}}$

$S_{b}$ 就是类内方差

$S_{w}$ 就是类间方差

此时 $J (θ θ) = \frac{θ θ^{T} S_{b} θ θ}{θ θ^{T} S_{w} θ θ}$

求导

\begin{array}{rcl} \frac{\partial J (θ θ)}{\partial θ θ} & = & \frac{\partial \frac{θ θ^{T} S S_{b} θ θ}{θ θ^{T} S S_{w} θ θ}}{\partial θ θ} \\ = & \frac{\partial (θ θ^{T} S S_{b} θ θ (θ θ^{T} S S_{w} θ θ)^{- 1})}{\partial θ θ} \\ = & \frac{\partial (θ θ^{T} S S_{b} θ θ)}{\partial θ θ} (θ θ^{T} S S_{w} θ θ)^{- 1} + θ θ^{T} S S_{b} θ θ \frac{\partial ((θ θ^{T} S S_{w} θ θ)^{- 1})}{\partial θ θ} \\ = & 2 θ θ^{T} S S_{b} (θ θ^{T} S S_{w} θ θ)^{- 1} + θ θ^{T} S S_{b} θ θ (- \frac{1}{(θ θ^{T} S S_{w} θ θ)^{2}}) (2 θ θ^{T} S S_{w}) \end{array}

令导数等于零

\begin{array}{rcl} 0 0 & = & 2 S S_{b} θ θ (θ θ^{T} S S_{w} θ θ)^{- 1} + θ θ^{T} S S_{b} θ θ (- \frac{1}{(θ θ^{T} S S_{w} θ θ)^{2}}) (2 S S_{w} θ θ) \\ 2 S S_{b} θ θ (θ θ^{T} S S_{w} θ θ)^{- 1} & = & θ θ^{T} S S_{b} θ θ (\frac{1}{(θ θ^{T} S S_{w} θ θ)^{2}}) (2 S S_{w} θ θ) \\ S S_{b} θ θ (θ θ^{T} S S_{w} θ θ) & = & (θ θ^{T} S S_{b} θ θ) S S_{w} θ θ \\ (θ θ^{T} S S_{b} θ θ) S S_{w} θ θ & = & S S_{b} θ θ (θ θ^{T} S S_{w} θ θ) \\ θ θ & = & S S_{w}^{- 1} \frac{θ θ^{T} S S_{w} θ θ}{θ θ^{T} S S_{b} θ θ} S S_{b} θ θ \\ θ θ & = & S S_{w}^{- 1} \frac{θ θ^{T} S S_{w} θ θ}{θ θ^{T} S S_{b} θ θ} (μ μ_{C_{1}} - μ μ_{C_{2}})^{T} (μ μ_{C_{1}} - μ μ_{C_{2}}) θ θ \end{array}

$∵$
$\frac{θ θ^{T} S S_{w} θ θ}{θ θ^{T} S S_{b} θ θ}$ , $(μ μ_{C_{1}} - μ μ_{C_{2}}) θ θ$ 是一个数，不影响 $θ θ$ 的方向

$∴$
$θ θ \propto S S_{w}^{- 1} (μ μ_{C_{1}} - μ μ_{C_{2}})^{T}$

$if S S_{w} \propto I I$

$θ θ \propto (μ μ_{C_{1}} - μ μ_{C_{2}})^{T}$

求任意一个点的投影

$p r o j_{θ θ} (x) = x^{T} θ θ$

求阈值

$\begin{array}{rcl} t h r e s h o l d & = & \frac{N_{C_{1}} {\tilde{μ}}_{C_{1}} + N_{C_{2}} {\tilde{μ}}_{C_{1}}}{N_{C_{1}} + N_{C_{2}}} \\ = & \frac{N_{C_{1}} μ μ_{C_{1}} θ θ + N_{C_{2}} μ μ_{C_{1}} θ θ}{N_{C_{1}} + N_{C_{2}}} \end{array}$

依赖

 import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

人工数据集

 n = 100
X = np.random.multivariate_normal((1, 1), [[0.64, 0], [0, 0.64]], size = int(n/2))
X = np.insert(X, 50, np.random.multivariate_normal((3, 3), [[0.64, 0], [0,0.64]], size = int(n/2)),0)
#X = np.insert(X, 0, 1, 1)
m = X.shape[1]
y = np.array([1]*50+[-1]*50).reshape(-1,1)
plt.scatter(X[:50, -2], X[:50, -1])
plt.scatter(X[50:, -2], X[50:, -1], c = "#ff4400")

 <matplotlib.collections.PathCollection at 0x7f2b50e680d0>

 X1 = X[(y==1).reshape(-1)]
X0 = X[(y==-1).reshape(-1)]
n1 = np.array([[X1.shape[0]]])
n0 = np.array([[X0.shape[0]]])
mu1 = X1.mean(axis = 0).reshape(-1,1)
mu0 = X0.mean(axis = 0).reshape(-1,1)
Sigma1 = np.cov(X1.T)
Sigma0 = np.cov(X0.T)
theta = (Sigma1 + Sigma0) @ (mu1 - mu0)
threshold = (n1*mu1 + n0*mu0).T@theta/(n1 + n0)
def getForecast(x):
    return x.T @ theta
threshold

 array([[-10.45793931]])

预测

 print(f'{ 1 if getForecast(np.array([[1],[1]])) > threshold else 0}')

分界展示

 plt.scatter(X[:50, -2], X[:50, -1])
plt.scatter(X[50:, -2], X[50:, -1], c = "#ff4400")
for i in np.arange(-1,5,0.02):
    for j in np.arange(-1,5,0.02):
        if abs(getForecast(np.array([[i],[j]])) - threshold) <0.01:
            plt.scatter(i,j,c="#000000")

posted @ 2022-09-29 20:07 孑然520 阅读(371) 评论(0) 编辑收藏举报

刷新页面返回顶部

登录后才能查看或发表评论，立即登录或者逛逛博客园首页

相关博文：

· 局部加权线性回归

· 有关损失函数推导

· PRML-4.1 判别函数

· Fisher线性判别分析

· 线性判别分析（LDA）模型笔记

阅读排行：
· TypeScript + Deepseek 打造卜卦网站：技术与玄学的结合
· Manus的开源复刻OpenManus初探
· AI 智能体引爆开源社区「GitHub 热点速览」
· C#/.NET/.NET Core技术前沿周刊 | 第 29 期（2025年3.1-3.9）
· 从HTTP原因短语缺失研究HTTP/2和HTTP/3的设计差异

	import numpy as np
	import pandas as pd
	import matplotlib.pyplot as plt

	n = 100
	X = np.random.multivariate_normal((1, 1), [[0.64, 0], [0, 0.64]], size = int(n/2))
	X = np.insert(X, 50, np.random.multivariate_normal((3, 3), [[0.64, 0], [0,0.64]], size = int(n/2)),0)
	#X = np.insert(X, 0, 1, 1)
	m = X.shape[1]
	y = np.array([1]50+[-1]50).reshape(-1,1)
	plt.scatter(X[:50, -2], X[:50, -1])
	plt.scatter(X[50:, -2], X[50:, -1], c = "#ff4400")

	X1 = X[(y==1).reshape(-1)]
	X0 = X[(y==-1).reshape(-1)]
	n1 = np.array([[X1.shape[0]]])
	n0 = np.array([[X0.shape[0]]])
	mu1 = X1.mean(axis = 0).reshape(-1,1)
	mu0 = X0.mean(axis = 0).reshape(-1,1)
	Sigma1 = np.cov(X1.T)
	Sigma0 = np.cov(X0.T)
	theta = (Sigma1 + Sigma0) @ (mu1 - mu0)
	threshold = (n1mu1 + n0mu0).T@theta/(n1 + n0)
	def getForecast(x):
	return x.T @ theta
	threshold

	plt.scatter(X[:50, -2], X[:50, -1])
	plt.scatter(X[50:, -2], X[50:, -1], c = "#ff4400")
	for i in np.arange(-1,5,0.02):
	for j in np.arange(-1,5,0.02):
	if abs(getForecast(np.array([[i],[j]])) - threshold) <0.01:
	plt.scatter(i,j,c="#000000")

集思摘