混合ガウスモデルの理論
標本 \(\mathcal{X}=\{\boldsymbol{x}_1,\boldsymbol{x}_2,\cdots,\boldsymbol{x}_N\}\) が得られたとき、尤度は
\[
\begin{align}
L(\boldsymbol{\theta} \mid \mathcal{X})
&=\prod_{i=1}^Np(\boldsymbol{x}_i;\boldsymbol{\theta}) \\
&=\prod_{i=1}^N\sum_{k=1}^K\pi_k\mathcal{N}(\boldsymbol{x}_i \mid \boldsymbol{\mu}_k,\Sigma_k)
\end{align}
\]
対数尤度は
\[
\begin{align}
\ell(\boldsymbol{\theta} \mid \mathcal{X})
&=\log L(\boldsymbol{\theta} \mid \mathcal{X}) \\
&=\sum_{i=1}^N\log\sum_{k=1}^K\pi_k\mathcal{N}(\boldsymbol{x}_i \mid \boldsymbol{\mu}_k,\Sigma_k)
\end{align}
\]
最適化問題
\[
\max_{\boldsymbol{\mu}_k,\Sigma_k}\sum_{i=1}^N\log\sum_{k=1}^K\pi_k\mathcal{N}(\boldsymbol{x}_i \mid \boldsymbol{\mu}_k,\Sigma_k) \quad \text{s.t.} \quad \sum_{k=1}^K\pi_k=1
\]
ラグランジュ関数は
\[
\mathcal{L}(\boldsymbol{\mu}_k,\Sigma_k,\lambda)=\sum_{i=1}^N\log\sum_{k=1}^K\pi_k\mathcal{N}(\boldsymbol{x}_i \mid \boldsymbol{\mu}_k,\Sigma_k) + \lambda\left(\sum_{k=1}^K\pi_k-1\right)
\]
となり、次の条件を考えます。
\[
\frac{\partial\mathcal{L}}{\partial\boldsymbol{\mu}_k}=\boldsymbol{0}^\top,\quad \frac{\partial\mathcal{L}}{\partial\Sigma_k}=O,\quad \sum_{k=1}^K\pi_k=1
\]
\(\dfrac{\partial\mathcal{L}}{\partial\boldsymbol{\mu}_k}=\boldsymbol{0}^\top\) について
\[
\begin{align}
\frac{\partial\mathcal{L}}{\partial\boldsymbol{\mu}_k}
&=\frac{\partial}{\partial\boldsymbol{\mu}_k}\sum_{i=1}^N\log\sum_{k=1}^K\pi_k\mathcal{N}(\boldsymbol{x}_i \mid \boldsymbol{\mu}_k,\Sigma_k) \\
&=\sum_{i=1}^N\frac{\partial}{\partial\boldsymbol{\mu}_k}\log\sum_{k=1}^K\pi_k\mathcal{N}(\boldsymbol{x}_i \mid \boldsymbol{\mu}_k,\Sigma_k) \\
&=\sum_{i=1}^N \frac{\displaystyle\frac{\partial}{\partial\boldsymbol{\mu}_k}\sum_{k=1}^K\pi_k\mathcal{N}(\boldsymbol{x}_i \mid \boldsymbol{\mu}_k,\Sigma_k)}{\displaystyle\sum_{j=1}^K\pi_j\mathcal{N}(\boldsymbol{x}_i \mid \boldsymbol{\mu}_j,\Sigma_j)} \\
&=\sum_{i=1}^N \frac{\pi_k\dfrac{\partial}{\partial\boldsymbol{\mu}_k}\mathcal{N}(\boldsymbol{x}_i \mid \boldsymbol{\mu}_k,\Sigma_k)}{\displaystyle\sum_{j=1}^K\pi_j\mathcal{N}(\boldsymbol{x}_i \mid \boldsymbol{\mu}_j,\Sigma_j)} \\
\end{align}
\]
ここで
\[
\begin{align}
\frac{\partial}{\partial\boldsymbol{\mu}_k} \mathcal{N}(\boldsymbol{x}_i \mid \boldsymbol{\mu}_k,\Sigma_k)
&=\frac{\partial}{\partial\boldsymbol{\mu}_k} \frac{1}{\sqrt{(2\pi)^d|\Sigma_k|}}e^{-\frac{1}{2}(\boldsymbol{x}_i-\boldsymbol{\mu}_k)^\top\Sigma_k^{-1}(\boldsymbol{x}_i-\boldsymbol{\mu}_k)} \\
&=\frac{1}{\sqrt{(2\pi)^d|\Sigma_k|}} \frac{\partial}{\partial\boldsymbol{\mu}_k} e^{-\frac{1}{2}(\boldsymbol{x}_i-\boldsymbol{\mu}_k)^\top\Sigma_k^{-1}(\boldsymbol{x}_i-\boldsymbol{\mu}_k)} \\
&=\frac{1}{\sqrt{(2\pi)^d|\Sigma_k|}} (\boldsymbol{x}_i-\boldsymbol{\mu}_k)^\top\Sigma_k^{-1}e^{-\frac{1}{2}(\boldsymbol{x}_i-\boldsymbol{\mu}_k)^\top\Sigma_k^{-1}(\boldsymbol{x}_i-\boldsymbol{\mu}_k)} \\
&=\mathcal{N}(\boldsymbol{x}_i \mid \boldsymbol{\mu}_k,\Sigma_k)(\boldsymbol{x}_i-\boldsymbol{\mu}_k)^\top\Sigma_k^{-1}
\end{align}
\]
なので
\[
\begin{align}
\frac{\partial\mathcal{L}}{\partial\boldsymbol{\mu}_k}
&=\sum_{i=1}^N \frac{\pi_k\mathcal{N}(\boldsymbol{x}_i \mid \boldsymbol{\mu}_k,\Sigma_k)}{\displaystyle\sum_{j=1}^K\pi_j\mathcal{N}(\boldsymbol{x}_i \mid \boldsymbol{\mu}_j,\Sigma_j)}(\boldsymbol{x}_i-\boldsymbol{\mu}_k)^\top\Sigma_k^{-1} \\
\end{align}
\]
となります。
\[
r_{ik}:=\frac{\pi_k\mathcal{N}(\boldsymbol{x}_i \mid \boldsymbol{\mu}_k,\Sigma_k)}{\displaystyle\sum_{j=1}^K\pi_j\mathcal{N}(\boldsymbol{x}_i \mid \boldsymbol{\mu}_j,\Sigma_j)}
\]
とおくと
\[
\begin{align}
\frac{\partial\mathcal{L}}{\partial\boldsymbol{\mu}_k}
&=\sum_{i=1}^N r_{ik}(\boldsymbol{x}_i-\boldsymbol{\mu}_k)^\top\Sigma_k^{-1} \\
\end{align}
\]