import pandas as pd
# load the xlsx Excel file Ortho-data.xlsx
file_path = "./Ortho_data.xlsx"
df = pd.read_excel(file_path)
# print the first 5 lines
print(df.head())
Individu = df['Individu'][::4].str.strip().tolist() # Take one value every 4 rows
Sex = df['Sex'][::4].str.strip().tolist() # Take one value every 4 rows
Distance Age Individu Sex 0 26.0 8 M01 Male 1 25.0 10 M01 Male 2 29.0 12 M01 Male 3 31.0 14 M01 Male 4 21.5 8 M02 Male
import numpy as np
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet
## 0.For a fixed value of lambda
lbd = 0.001
## 1.Initialisation :
y_ij = np.array(df['Distance'].values.reshape(-1, 4))
(I, J) = np.shape(y_ij)
I = y_ij.shape[0]
n = y_ij.shape[0]
Y = y_ij.reshape(-1, 1) # Convert to column vector
t_j = np.array([8, 10, 12, 14])
X_i = np.column_stack((np.ones(len(t_j)), t_j))
X = np.vstack([X_i] * I)
N = X.shape[0]
Z_i = np.column_stack((np.ones(len(t_j)), t_j))
# 1.1 Initialize LASSO model with lambda (alpha in sklearn)
'''
$$
\beta^{(0)} \leftarrow \arg\min_{\beta} \left\{ (Y - X\beta)^\top (Y - X\beta) + \lambda \|\beta\|_1 \right\}
$$
'''
lasso = Lasso(alpha=lbd, fit_intercept=False) # Equivalent to lambda in equation
# Fit model
lasso.fit(X, Y.ravel())
# Get estimated beta
beta_0 = lasso.coef_.reshape(-1, 1)
matrix_beta = []
matrix_beta.append(beta_0)
# 1.2 Compute sigma^2(0)
'''
\sigma^{2(0)} \leftarrow \frac{1}{n} (Y - X\beta^{(0)})^\top (Y - X\beta^{(0)})
'''
sigma2_0 = (1 / n) * ((Y - X @ beta_0).T @ (Y - X @ beta_0)) # Compute variance estimate
# print(sigma2_0)
matrix_sigma2 = []
matrix_sigma2.append(sigma2_0)
# 1.3 b
b_i_0 = np.array([[0], [0]])
b_0 = np.vstack([b_i_0] * I)
matrix_b = []
matrix_b.append(b_0)
# 1.4 D
D_0 = np.eye(2) # D : cov matrix of b_i
matrix_D = []
matrix_D.append(D_0)
K_max = 100 # 最大迭代次数
for k in range(1, K_max):
# --- 2. E-Step ---
'''
\tilde{y}_i^{(k)} = y_i - Z_i b_i^{(k)}
\Lambda_i^{(k)} = (D^{(k-1)} + Z_i^\top Z_i / \sigma^{(k-1)})^{-1}
b_i^{(k)} = \frac{1}{\sigma^{(k-1)}} \Lambda_i^{(k)} Z_i^\top \tilde{y}_i^{(k)}
\lambda_1^{(k)} = 2 \lambda \sigma^{(k-1)}
'''
# 2.1 Compute \tilde{y}_i^{(k)} for each i
'''
\tilde{y}_i^{(k)} = y_i - Z_i b_i^{(k)}
\tilde{y}^{(k)} = [[\tilde{y}_1^{(k)}]^T, [\tilde{y}_2^{(k)}]^T, .. , [\tilde{y}_I^{(k)}]^T]
'''
y_tilde_i_k = [np.transpose(y_ij[i, :]).reshape(-1,1) - Z_i @ matrix_b[-1][2*i:2*i+2, :] for i in range(I)]
# y_tilde_k = np.vstack([arr.T for arr in y_tilde_i_k]) # 每个 array_i 转置后拼接
y_tilde_k = np.stack(y_tilde_i_k).reshape(-1,1)
# 2.2 Compute Λ_i^{(k)} for all i
'''
\Lambda_i^{(k)} = (D^{(k-1)} + Z_i^\top Z_i / \sigma^{(k-1)})^{-1}
'''
Lambda_i_k = np.linalg.inv(np.linalg.inv(matrix_D[-1]) + (Z_i.T @ Z_i) / matrix_sigma2[-1])
# 2.3 Compute b_i^{(k)} for each i
'''
b_i^{(k)} = \frac{1}{\sigma^{(k-1)}} \Lambda_i^{(k)} Z_i^\top \tilde{y}_i^{(k)}
b^{(k)} = [[b_1^{(k)}]^T, [b_2^{(k)}]^T, ..., [b_I^{(k)}]^T]^T
'''
#b_i_k = [(1 / matrix_sigma2[-1]) * ( Lambda_i_k @ Z_i.T @ (y_tilde_k[i, :].reshape(-1, 1) - X_i @ matrix_beta[-1]) ) for i in range(I)]
## Oprion 1
##b_i_k = [(1 / matrix_sigma2[-1]) * (Lambda_i_k @ Z_i.T @ (y_tilde_k[J*i:J*(i+1), :] - X_i @ matrix_beta[-1])) for i in range(I)]
## Oprion 2
b_i_k = [(matrix_D[-1] @ Z_i.T @ np.linalg.inv(Z_i @ matrix_D[-1] @ Z_i.T + matrix_sigma2[-1] * np.eye(J)) @ (y_tilde_k[J*i:J*(i+1), :] - X_i @ matrix_beta[-1])) for i in range(I)]
b_k = np.vstack(b_i_k) # Stack all b_i^(k) into a single matrix
# 2.4 Compute lambda_1^{(k)}
lbd1_k = 2 * lbd * matrix_sigma2[-1]
# Store updated b
matrix_b.append(b_k)
## --- 3. M-Step ---
# 3.1 Compute \beta^{(k+1)}
'''
\beta^{(k+1)} \leftarrow \arg\min_{\beta} \left\{ (\tilde{Y}^{(k)} - X\beta)^\top (\tilde{Y}^{(k)} - X\beta) + \lambda_1^{(k)} \Psi_p^{\alpha}(\beta) \right\}
'''
# Select Elastic Net as the regularization method (replacing Lasso)
ratio_l1l2 = 0.5 # Choose the Elastic Net mixing ratio between L1 and L2 penalties (0 < ratio_l1l2 < 1)
elastic_net = ElasticNet(alpha=lbd1_k, l1_ratio=ratio_l1l2, fit_intercept=False)
# Fit the Elastic Net model
elastic_net.fit(X, y_tilde_k.ravel())
# Obtain the updated beta^{(k+1)}
beta_kadd1 = elastic_net.coef_.reshape(-1, 1)
# 3.2 Compute \sigma^{2(k+1)}
'''
\sigma^{2(k+1)} \leftarrow \frac{1}{N} (\tilde{y}^{(k)} - X\beta^{(k)})^\top (\tilde{y}^{(k)} - X\beta^{(k)}) + \sum_{i=1}^{n} \text{tr}(Z_i \Lambda_i^{(k)} Z_i^\top)
'''
sigma2_k = (1 / N) * ( ((y_tilde_k - X @ matrix_beta[-1]).T @ (y_tilde_k - X @ matrix_beta[-1])) + sum(np.trace(Z_i @ Lambda_i_k @ Z_i.T) for i in range(I)) )
##toto = (1 / N) * ( ((y_tilde_k - X @ matrix_beta[-1]).T @ (y_tilde_k - X @ matrix_beta[-1])) + np.sum(np.trace(Z_i @ Lambda_i_k @ Z_i.T))*I )
# 3.3 Compute D^{(k+1)}
'''
D^{(k+1)} \leftarrow \frac{1}{n} \sum_{i=1}^{n} \left( b_i^{(k)} b_i^{(k)T} + \Lambda_i^{(k)} \right)
'''
#D_k = (1 / I) * sum(b_i_k[i] @ b_i_k[i].T + Lambda_i_k for i in range(I))
D_k = (1 / I) * np.sum([b_i_k[i] @ b_i_k[i].T + Lambda_i_k for i in range(I)], axis=0)
# Store the updated beta
matrix_beta.append(beta_kadd1)
# Store updated sigma^2
matrix_sigma2.append(sigma2_k)
# Store updated D
matrix_D.append(D_k)
print(matrix_beta[-1])
print(matrix_D[-1])
print(matrix_sigma2[-1])
[[15.06165971] [ 0.80693224]] [[ 0.58343463 -0.01490246] [-0.01490246 0.00672141]] [[4.24344641]]
from scipy.linalg import block_diag
U = block_diag(*[X_i for _ in range(I)])
I_J = np.diag(np.ones(N)) # N = I * J = 27 * 4 = 108
B = np.hstack([U, I_J])
D_estimate = matrix_D[-1]
G_estimate = block_diag(*[D_estimate for _ in range(I)])
sigma2_estimate = matrix_sigma2[-1][0][0]
R_estimate = np.eye(N,N) * sigma2_estimate
W = block_diag(G_estimate, R_estimate)
V = B @ W @ B.T
# beta_estimate = np.linalg.inv(X.T @ np.linalg.inv(V) @ X ) @ X.T @ np.linalg.inv(V) @ Y
beta_estimate = matrix_beta[-1]
Y_fit = X @ beta_estimate + U @ (- G_estimate @ U.T @ np.linalg.inv(V) @ (Y - X @ beta_estimate))
y_fit_ij = Y_fit.reshape(-1, 4)
print("y_ij = \n", y_ij)
print("y_fit_ij = \n", y_fit_ij)
y_ij = [[26. 25. 29. 31. ] [21.5 22.5 23. 26.5] [23. 22.5 24. 27.5] [25.5 27.5 26.5 27. ] [20. 23.5 22.5 26. ] [24.5 25.5 27. 28.5] [22. 22. 24.5 26.5] [24. 21.5 24.5 25.5] [23. 20.5 31. 26. ] [27.5 28. 31. 31.5] [23. 23. 23.5 25. ] [21.5 23.5 24. 28. ] [17. 24.5 26. 29.5] [22.5 25.5 25.5 26. ] [23. 24.5 26. 30. ] [22. 21.5 23.5 25. ] [21. 20. 21.5 23. ] [21. 21.5 24. 25.5] [20.5 24. 24.5 26. ] [23.5 24.5 25. 26.5] [21.5 23. 22.5 23.5] [20. 21. 21. 22.5] [21.5 22.5 23. 25. ] [23. 23. 23.5 24. ] [20. 21. 22. 21.5] [16.5 19. 19. 19.5] [24.5 25. 28. 28. ]] y_fit_ij = [[19.91846679 21.31924968 22.72003258 24.12081548] [21.75388733 23.39972249 25.04555766 26.69139283] [21.39071108 22.9903445 24.58997792 26.18961134] [20.43797776 21.93594667 23.43391558 24.93188449] [21.90534729 23.56797388 25.23060047 26.89322706] [20.50964208 21.99773999 23.4858379 24.97393581] [21.59591426 23.22027683 24.84463939 26.46900196] [21.57148003 23.20897016 24.84646028 26.48395041] [21.01125207 22.5529808 24.09470954 25.63643827] [19.20188396 20.51728512 21.83268628 23.14808745] [21.67896645 23.33233203 24.98569761 26.63906319] [21.37442832 22.9623588 24.55028928 26.13821976] [21.31255379 22.85601312 24.39947244 25.94293177] [21.14479092 22.72111981 24.29744869 25.87377758] [20.68879282 22.18289518 23.67699755 25.17109991] [21.92488662 23.60155673 25.27822684 26.95489695] [22.61377866 24.38661748 26.1594563 27.93229512] [21.90860385 23.57357102 25.2385382 26.90350537] [21.5926577 23.21467968 24.83670167 26.45872365] [21.14804747 22.72671695 24.30538642 25.88405589] [22.09914246 23.8089881 25.51883375 27.22867939] [22.71800853 24.50438221 26.2907559 28.07712959] [21.92488662 23.60155673 25.27822684 26.95489695] [21.79296598 23.46688819 25.1408104 26.81473261] [22.72452163 24.5155765 26.30663136 28.09768622] [23.80753726 25.73251619 27.65749513 29.58247406] [20.50964208 21.99773999 23.4858379 24.97393581]]
$$ \mathbf{y}_i = \begin{bmatrix} y_{i1} \\ \vdots \\ y_{iJ} \end{bmatrix} = A \begin{bmatrix} \beta_0 \\ \beta_1 \end{bmatrix} + A \begin{bmatrix} \beta_{0r, i} \\ \beta_{1r, i} \end{bmatrix} + \begin{bmatrix} \epsilon_{i1} \\ \vdots \\ \epsilon_{iJ} \end{bmatrix} $$
with
$$ A = \begin{bmatrix} 1 & 8 \\ 1 & 10 \\ 1 & 12 \\ 1 & 14 \\ \end{bmatrix} $$
after EM we get $$ \hat{\sigma}_{0r}^2, ~ \hat{\sigma}_{1r}^2, ~ \hat{\rho}_{0r, 1r}, ~ \hat{\sigma}_{1r}^2 $$
if we compute, $\forall i \in [1, 2, \ldots, J]$ $$ B = \begin{bmatrix} A & \mathbb{I}_{J\times J} \end{bmatrix} = \begin{bmatrix} 1 & 8 & 1 & 0 & 0 & 0\\ 1 & 10 & 0 & 1 & 0 & 0\\ 1 & 12 & 0 & 0 & 1 & 0 \\ 1 & 14 & 0 & 0 & 0 & 1 \\ \end{bmatrix} $$ $$ \hat{G_i} = \hat{D} = \begin{bmatrix} \hat{\sigma}_{0r}^2 & \hat{\rho}_{0r, 1r} \\ \hat{\rho}_{0r, 1r} & \hat{\sigma}_{1r}^2 \end{bmatrix} $$
$$ \hat{R_i} = \hat{\sigma}^2 \mathbb{I}_{J \times J} $$
$$ \hat{W_i} = \begin{bmatrix} \hat{G_i} & \mathbf{0} \\ \mathbf{0} & \hat{R_i} \end{bmatrix} $$
$$ \hat{V}_i = B \hat{W}_i B^T $$
Consider $$ \begin{bmatrix} \hat{\beta}_{0, i} \\ \hat{\beta}_{1, i} \end{bmatrix} = (A^T {\hat{V}_i}^{-1} A)^{-1} A^{T} \hat{V}_i ~ \mathbf{y}_i $$
in our case $$ \hat{V}_i \simeq k \mathbb{I}_{J\times J} $$
thus
$$ \begin{align*} \begin{bmatrix} \hat{\beta}_{0, i} \\ \hat{\beta}_{1, i} \end{bmatrix} &\simeq (A^T ~ \frac{1}{k} \mathbb{I}_{J\times J} ~ A)^{-1} A^{T} k \mathbb{I}_{J\times J} ~ \mathbf{y}_i \\ &= (A^T A)^{-1} A^{T} ~ \mathbf{y}_i \\ &= A^{+} \mathbf{y}_i \end{align*} $$
where $(\cdot)^{+}$ is the Moore-Penrose Pseudoinverse
Consider $$ \begin{bmatrix} \hat{\beta}_{0r, i} \\ \hat{\beta}_{1r, i} \end{bmatrix} = - \hat{G_i} A^T \hat{V}_i^{-1} (\mathbf{y}_i - A \begin{bmatrix} \hat{\beta}_{0, i} \\ \hat{\beta}_{1, i} \end{bmatrix} ) $$
in our case $$ \begin{bmatrix} \hat{\beta}_{0r, i} \\ \hat{\beta}_{1r, i} \end{bmatrix} \simeq \begin{bmatrix} 0 \\ 0 \end{bmatrix} $$
$$ \begin{align*} \Rightarrow \mathbf{y}_{fit ~ i} &= A \begin{bmatrix} \hat{\beta}_{0, i} \\ \hat{\beta}_{1, i} \end{bmatrix} + A \begin{bmatrix} \hat{\beta}_{0r, i} \\ \hat{\beta}_{1r, i} \end{bmatrix} \\ &\simeq A A^{+} \mathbf{y}_i \end{align*} $$
A = X_i
I_J = np.diag([1, 1, 1, 1]) # J = 4
B = np.hstack([A, I_J])
G_estimate = matrix_D[-1]
sigma2_estimate = matrix_sigma2[-1][0][0]
R_estimate = np.eye(J,J) * sigma2_estimate
W = np.block([
[G_estimate, np.zeros((2, J))], # G + 0(2x4)
[np.zeros((J, 2)), R_estimate] # 0(4x2) + D
])
V = B @ W @ B.T
print(np.linalg.inv(V) * 10)
print(np.linalg.inv(A.T @ A) @ A.T)
print(np.linalg.inv(A.T @ np.linalg.inv(V) @ A ) @ A.T @ np.linalg.inv(V))
'''begin test : for one line in y_ij, with line in [1, I]'''
line = 27
##beta_estimate_line = np.linalg.inv(X.T @ np.linalg.inv(V) @ X ) @ X.T @ np.linalg.inv(V) @ Y
beta_estimate_line = np.linalg.inv(A.T @ np.linalg.inv(V) @ A ) @ A.T @ np.linalg.inv(V) @ y_ij[line-1,:].reshape(-1,1)
##beta_estimate_line = matrix_beta[-1]
y_fit_line = A @ beta_estimate_line + A @ (- G_estimate @ A.T @ np.linalg.inv(V) @ (y_ij[line-1,:].reshape(-1,1) - A @ beta_estimate_line))
print("here", - G_estimate @ A.T @ np.linalg.inv(V) @ (y_ij[line-1,:].reshape(-1,1) - A @ beta_estimate_line))
##y_fit_line = A @ beta_estimate_line
print(f"y_{line} = \n ", y_ij[line-1,:].reshape(-1,1))
print(f"y_fit_{line} = \n ", y_fit_line)
'''end test'''
y_fit_ij = np.vstack([
(
A @ beta_estimate_i + A @ (- G_estimate @ A.T @ np.linalg.inv(V) @ (y_ij[i-1,:].reshape(-1,1) - A @ beta_estimate_i ))
).T
for i in range(1, I+1) # i 从 1 到 27
for beta_estimate_i in [np.linalg.inv(A.T @ np.linalg.inv(V) @ A) @ A.T @ np.linalg.inv(V) @ y_ij[i-1,:].reshape(-1,1)]
])
print("y_ij = \n " , y_ij)
print("y_fit_ij = \n " , y_fit_ij)
'''
problem is that
print(A @ (- G_estimate @ A.T @ np.linalg.inv(V) @ (y_ij[line-1,:].reshape(-1,1) - A @ beta_estimate_line)))
is around 0 !
it means
A @ (np.linalg.inv(A.T @ np.linalg.inv(V) @ A ) @ A.T @ np.linalg.inv(V) @ y_ij[line-1,:].reshape(-1,1))
already gives the fit result for individuel = line
'''
[[ 2.13397251 -0.23795118 -0.25329981 -0.26864844] [-0.23795118 2.09224372 -0.29071151 -0.31709168] [-0.25329981 -0.29071151 2.02845185 -0.36553492] [-0.26864844 -0.31709168 -0.36553492 1.9425969 ]] [[ 1.9 0.8 -0.3 -1.4 ] [-0.15 -0.05 0.05 0.15]] [[ 1.9 0.8 -0.3 -1.4 ] [-0.15 -0.05 0.05 0.15]] here [[-3.51108032e-15] [-7.28583860e-16]] y_27 = [[24.5] [25. ] [28. ] [28. ]] y_fit_27 = [[24.35] [25.7 ] [27.05] [28.4 ]] y_ij = [[26. 25. 29. 31. ] [21.5 22.5 23. 26.5] [23. 22.5 24. 27.5] [25.5 27.5 26.5 27. ] [20. 23.5 22.5 26. ] [24.5 25.5 27. 28.5] [22. 22. 24.5 26.5] [24. 21.5 24.5 25.5] [23. 20.5 31. 26. ] [27.5 28. 31. 31.5] [23. 23. 23.5 25. ] [21.5 23.5 24. 28. ] [17. 24.5 26. 29.5] [22.5 25.5 25.5 26. ] [23. 24.5 26. 30. ] [22. 21.5 23.5 25. ] [21. 20. 21.5 23. ] [21. 21.5 24. 25.5] [20.5 24. 24.5 26. ] [23.5 24.5 25. 26.5] [21.5 23. 22.5 23.5] [20. 21. 21. 22.5] [21.5 22.5 23. 25. ] [23. 23. 23.5 24. ] [20. 21. 22. 21.5] [16.5 19. 19. 19.5] [24.5 25. 28. 28. ]] y_fit_ij = [[24.9 26.8 28.7 30.6 ] [21.05 22.6 24.15 25.7 ] [22. 23.5 25. 26.5 ] [26.1 26.45 26.8 27.15] [20.45 22.15 23.85 25.55] [24.35 25.7 27.05 28.4 ] [21.35 22.95 24.55 26.15] [22.75 23.5 24.25 25. ] [22.2 24.15 26.1 28.05] [27.25 28.75 30.25 31.75] [22.65 23.3 23.95 24.6 ] [21.25 23.25 25.25 27.25] [18.4 22.3 26.2 30.1 ] [23.3 24.35 25.4 26.45] [22.5 24.75 27. 29.25] [21.35 22.45 23.55 24.65] [20.25 21. 21.75 22.5 ] [20.6 22.2 23.8 25.4 ] [21.2 22.9 24.6 26.3 ] [23.45 24.4 25.35 26.3 ] [21.8 22.35 22.9 23.45] [20. 20.75 21.5 22.25] [21.35 22.45 23.55 24.65] [22.85 23.2 23.55 23.9 ] [20.3 20.85 21.4 21.95] [17.15 18.05 18.95 19.85] [24.35 25.7 27.05 28.4 ]]
'\nproblem is that \nprint(A @ (- G_estimate @ A.T @ np.linalg.inv(V) @ (y_ij[line-1,:].reshape(-1,1) - A @ beta_estimate_line)))\nis around 0 !\nit means\nA @ (np.linalg.inv(A.T @ np.linalg.inv(V) @ A ) @ A.T @ np.linalg.inv(V) @ y_ij[line-1,:].reshape(-1,1))\nalready gives the fit result for individuel = line\n'
import numpy as np
import matplotlib.pyplot as plt
Individual = df['Individu'][::4].values
Sex = df['Sex'][::4].values
t_j = np.array([8, 10, 12, 14]) # Time points (Age)
(I, J) = np.shape(y_ij)
# Set parameter: choose every 'm' individuals
m = 1 # Change this to set the interval for subplot selection
selected_indices = np.arange(0, I, m) # Select every m-th row
# Number of selected individuals
num_selected = len(selected_indices)
# Create figure with subplots (2 columns)
ncols_=4
fig, axes = plt.subplots(nrows=(num_selected + 1) // ncols_, ncols=ncols_, figsize=(16, 4 * ((num_selected + 1) // ncols_)))
axes = axes.flatten() # Flatten in case of odd subplot count
for idx, i in enumerate(selected_indices):
ax = axes[idx] # Select subplot
# Extract individual label
label = f'{Individual[i]} ({Sex[i]})'
# Plot scatter points for y_ij
ax.scatter(t_j, y_ij[i], color='red', label=f'{label} Observed', marker='o')
# Plot y_fit as a line
ax.plot(t_j, y_fit_ij[i], color='blue', label=f'{label} Fitted', linestyle='-', marker='s')
# Labels and title
ax.set_xlabel('t_j (Age)')
ax.set_ylabel('Distance')
ax.set_title(f'Individual: {label}')
ax.legend()
ax.grid(True, linestyle='--', alpha=0.7)
# Adjust layout
plt.tight_layout()
plt.show()