In [24]:
import pandas as pd

# load the xlsx Excel file Ortho-data.xlsx
file_path = "./Ortho_data.xlsx"  
df = pd.read_excel(file_path)

# print the first 5 lines
print(df.head())

Individu = df['Individu'][::4].str.strip().tolist()  # Take one value every 4 rows
Sex = df['Sex'][::4].str.strip().tolist()  #  Take one value every 4 rows
   Distance  Age Individu      Sex
0      26.0    8      M01     Male
1      25.0   10      M01  Male   
2      29.0   12      M01  Male   
3      31.0   14      M01  Male   
4      21.5    8      M02  Male   
In [25]:
import numpy as np
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet

## 0.For a fixed value of lambda
lbd = 0.001

## 1.Initialisation :

y_ij = np.array(df['Distance'].values.reshape(-1, 4))
(I, J) = np.shape(y_ij)
I = y_ij.shape[0]       
n = y_ij.shape[0]                                        
Y = y_ij.reshape(-1, 1)                                         # Convert to column vector

t_j = np.array([8, 10, 12, 14])
X_i = np.column_stack((np.ones(len(t_j)), t_j))
X = np.vstack([X_i] * I)
N = X.shape[0]

Z_i = np.column_stack((np.ones(len(t_j)), t_j))

# 1.1 Initialize LASSO model with lambda (alpha in sklearn)
'''
$$
\beta^{(0)} \leftarrow \arg\min_{\beta} \left\{ (Y - X\beta)^\top (Y - X\beta) + \lambda \|\beta\|_1 \right\}
$$
'''
lasso = Lasso(alpha=lbd, fit_intercept=False)                    # Equivalent to lambda in equation
# Fit model
lasso.fit(X, Y.ravel())
# Get estimated beta
beta_0 = lasso.coef_.reshape(-1, 1)
matrix_beta = []
matrix_beta.append(beta_0)

# 1.2 Compute sigma^2(0)
'''
\sigma^{2(0)} \leftarrow \frac{1}{n} (Y - X\beta^{(0)})^\top (Y - X\beta^{(0)})
'''
sigma2_0 = (1 / n) * ((Y - X @ beta_0).T @ (Y - X @ beta_0))      # Compute variance estimate
# print(sigma2_0)
matrix_sigma2 = []
matrix_sigma2.append(sigma2_0)

# 1.3 b
b_i_0 = np.array([[0], [0]]) 
b_0 = np.vstack([b_i_0] * I)
matrix_b = []
matrix_b.append(b_0)

# 1.4 D
D_0 = np.eye(2)                                                   # D : cov matrix of b_i
matrix_D = []
matrix_D.append(D_0)


K_max = 100  # 最大迭代次数
for k in range(1, K_max):

    # --- 2. E-Step ---
    '''
    \tilde{y}_i^{(k)} = y_i - Z_i b_i^{(k)}
    \Lambda_i^{(k)} = (D^{(k-1)} + Z_i^\top Z_i / \sigma^{(k-1)})^{-1}
    b_i^{(k)} = \frac{1}{\sigma^{(k-1)}} \Lambda_i^{(k)} Z_i^\top \tilde{y}_i^{(k)}
    \lambda_1^{(k)} = 2 \lambda \sigma^{(k-1)}
    '''
    
    # 2.1 Compute \tilde{y}_i^{(k)} for each i
    '''
    \tilde{y}_i^{(k)} = y_i - Z_i b_i^{(k)}
    \tilde{y}^{(k)} = [[\tilde{y}_1^{(k)}]^T, [\tilde{y}_2^{(k)}]^T, .. , [\tilde{y}_I^{(k)}]^T]
    '''
    y_tilde_i_k = [np.transpose(y_ij[i, :]).reshape(-1,1) - Z_i @ matrix_b[-1][2*i:2*i+2, :] for i in range(I)]
    # y_tilde_k = np.vstack([arr.T for arr in y_tilde_i_k])                   # 每个 array_i 转置后拼接
    y_tilde_k = np.stack(y_tilde_i_k).reshape(-1,1)
    
    # 2.2 Compute Λ_i^{(k)} for all i
    '''
    \Lambda_i^{(k)} = (D^{(k-1)} + Z_i^\top Z_i / \sigma^{(k-1)})^{-1}
    '''
    Lambda_i_k = np.linalg.inv(np.linalg.inv(matrix_D[-1]) + (Z_i.T @ Z_i) / matrix_sigma2[-1])
    
    # 2.3 Compute b_i^{(k)} for each i
    '''
    b_i^{(k)} = \frac{1}{\sigma^{(k-1)}} \Lambda_i^{(k)} Z_i^\top \tilde{y}_i^{(k)}
    b^{(k)} = [[b_1^{(k)}]^T, [b_2^{(k)}]^T, ..., [b_I^{(k)}]^T]^T
    '''
    #b_i_k = [(1 / matrix_sigma2[-1]) * ( Lambda_i_k @ Z_i.T @ (y_tilde_k[i, :].reshape(-1, 1) - X_i @ matrix_beta[-1]) ) for i in range(I)]
    ## Oprion 1
    ##b_i_k = [(1 / matrix_sigma2[-1]) * (Lambda_i_k @ Z_i.T @ (y_tilde_k[J*i:J*(i+1), :] - X_i @ matrix_beta[-1])) for i in range(I)]
    ## Oprion 2
    b_i_k = [(matrix_D[-1] @ Z_i.T @ np.linalg.inv(Z_i @  matrix_D[-1] @ Z_i.T + matrix_sigma2[-1] * np.eye(J)) @ (y_tilde_k[J*i:J*(i+1), :] - X_i @ matrix_beta[-1])) for i in range(I)]
    b_k = np.vstack(b_i_k)                                                  # Stack all b_i^(k) into a single matrix
    

    # 2.4 Compute lambda_1^{(k)}
    lbd1_k = 2 * lbd * matrix_sigma2[-1]

    # Store updated b
    matrix_b.append(b_k)
    
    ## --- 3. M-Step ---

    # 3.1 Compute \beta^{(k+1)}
    '''
    \beta^{(k+1)} \leftarrow \arg\min_{\beta} \left\{ (\tilde{Y}^{(k)} - X\beta)^\top (\tilde{Y}^{(k)} - X\beta) + \lambda_1^{(k)} \Psi_p^{\alpha}(\beta) \right\}
    '''

    # Select Elastic Net as the regularization method (replacing Lasso)
    ratio_l1l2 = 0.5  # Choose the Elastic Net mixing ratio between L1 and L2 penalties (0 < ratio_l1l2 < 1)
    elastic_net = ElasticNet(alpha=lbd1_k, l1_ratio=ratio_l1l2, fit_intercept=False)

    # Fit the Elastic Net model
    elastic_net.fit(X, y_tilde_k.ravel())

    # Obtain the updated beta^{(k+1)}
    beta_kadd1 = elastic_net.coef_.reshape(-1, 1)
    
    # 3.2 Compute \sigma^{2(k+1)}
    '''
    \sigma^{2(k+1)} \leftarrow \frac{1}{N} (\tilde{y}^{(k)} - X\beta^{(k)})^\top (\tilde{y}^{(k)} - X\beta^{(k)}) + \sum_{i=1}^{n} \text{tr}(Z_i \Lambda_i^{(k)} Z_i^\top)
    '''

    sigma2_k = (1 / N) * ( ((y_tilde_k - X @ matrix_beta[-1]).T @ (y_tilde_k - X @ matrix_beta[-1])) + sum(np.trace(Z_i @ Lambda_i_k @ Z_i.T) for i in range(I)) )
    ##toto = (1 / N) * ( ((y_tilde_k - X @ matrix_beta[-1]).T @ (y_tilde_k - X @ matrix_beta[-1])) + np.sum(np.trace(Z_i @ Lambda_i_k @ Z_i.T))*I )
    
    # 3.3 Compute D^{(k+1)}
    '''
    D^{(k+1)} \leftarrow \frac{1}{n} \sum_{i=1}^{n} \left( b_i^{(k)} b_i^{(k)T} + \Lambda_i^{(k)} \right)
    '''
    
    #D_k = (1 / I) * sum(b_i_k[i] @ b_i_k[i].T + Lambda_i_k for i in range(I))
    D_k = (1 / I) * np.sum([b_i_k[i] @ b_i_k[i].T + Lambda_i_k for i in range(I)], axis=0)
        
    # Store the updated beta
    matrix_beta.append(beta_kadd1)
    # Store updated sigma^2
    matrix_sigma2.append(sigma2_k)
    # Store updated D
    matrix_D.append(D_k)
In [26]:
print(matrix_beta[-1])
print(matrix_D[-1])
print(matrix_sigma2[-1])
[[15.06165971]
 [ 0.80693224]]
[[ 0.58343463 -0.01490246]
 [-0.01490246  0.00672141]]
[[4.24344641]]

Fit ${y_{fit ~ ij} }$ Way 1¶

In [27]:
from scipy.linalg import block_diag

U = block_diag(*[X_i for _ in range(I)])  
I_J = np.diag(np.ones(N))  # N = I * J = 27 * 4 = 108

B = np.hstack([U, I_J])

D_estimate = matrix_D[-1]

G_estimate = block_diag(*[D_estimate for _ in range(I)])  

sigma2_estimate = matrix_sigma2[-1][0][0]
R_estimate = np.eye(N,N) * sigma2_estimate

W = block_diag(G_estimate, R_estimate)

V = B @ W @ B.T
# beta_estimate = np.linalg.inv(X.T @ np.linalg.inv(V) @ X ) @ X.T @ np.linalg.inv(V) @ Y
beta_estimate = matrix_beta[-1]

Y_fit = X @ beta_estimate + U @ (- G_estimate @ U.T @ np.linalg.inv(V) @ (Y - X @ beta_estimate))
y_fit_ij = Y_fit.reshape(-1, 4)
print("y_ij = \n", y_ij)
print("y_fit_ij = \n", y_fit_ij)
y_ij = 
 [[26.  25.  29.  31. ]
 [21.5 22.5 23.  26.5]
 [23.  22.5 24.  27.5]
 [25.5 27.5 26.5 27. ]
 [20.  23.5 22.5 26. ]
 [24.5 25.5 27.  28.5]
 [22.  22.  24.5 26.5]
 [24.  21.5 24.5 25.5]
 [23.  20.5 31.  26. ]
 [27.5 28.  31.  31.5]
 [23.  23.  23.5 25. ]
 [21.5 23.5 24.  28. ]
 [17.  24.5 26.  29.5]
 [22.5 25.5 25.5 26. ]
 [23.  24.5 26.  30. ]
 [22.  21.5 23.5 25. ]
 [21.  20.  21.5 23. ]
 [21.  21.5 24.  25.5]
 [20.5 24.  24.5 26. ]
 [23.5 24.5 25.  26.5]
 [21.5 23.  22.5 23.5]
 [20.  21.  21.  22.5]
 [21.5 22.5 23.  25. ]
 [23.  23.  23.5 24. ]
 [20.  21.  22.  21.5]
 [16.5 19.  19.  19.5]
 [24.5 25.  28.  28. ]]
y_fit_ij = 
 [[19.91846679 21.31924968 22.72003258 24.12081548]
 [21.75388733 23.39972249 25.04555766 26.69139283]
 [21.39071108 22.9903445  24.58997792 26.18961134]
 [20.43797776 21.93594667 23.43391558 24.93188449]
 [21.90534729 23.56797388 25.23060047 26.89322706]
 [20.50964208 21.99773999 23.4858379  24.97393581]
 [21.59591426 23.22027683 24.84463939 26.46900196]
 [21.57148003 23.20897016 24.84646028 26.48395041]
 [21.01125207 22.5529808  24.09470954 25.63643827]
 [19.20188396 20.51728512 21.83268628 23.14808745]
 [21.67896645 23.33233203 24.98569761 26.63906319]
 [21.37442832 22.9623588  24.55028928 26.13821976]
 [21.31255379 22.85601312 24.39947244 25.94293177]
 [21.14479092 22.72111981 24.29744869 25.87377758]
 [20.68879282 22.18289518 23.67699755 25.17109991]
 [21.92488662 23.60155673 25.27822684 26.95489695]
 [22.61377866 24.38661748 26.1594563  27.93229512]
 [21.90860385 23.57357102 25.2385382  26.90350537]
 [21.5926577  23.21467968 24.83670167 26.45872365]
 [21.14804747 22.72671695 24.30538642 25.88405589]
 [22.09914246 23.8089881  25.51883375 27.22867939]
 [22.71800853 24.50438221 26.2907559  28.07712959]
 [21.92488662 23.60155673 25.27822684 26.95489695]
 [21.79296598 23.46688819 25.1408104  26.81473261]
 [22.72452163 24.5155765  26.30663136 28.09768622]
 [23.80753726 25.73251619 27.65749513 29.58247406]
 [20.50964208 21.99773999 23.4858379  24.97393581]]

Fit ${y_{fit ~ ij} }$ Way 2¶

$$ \mathbf{y}_i = \begin{bmatrix} y_{i1} \\ \vdots \\ y_{iJ} \end{bmatrix} = A \begin{bmatrix} \beta_0 \\ \beta_1 \end{bmatrix} + A \begin{bmatrix} \beta_{0r, i} \\ \beta_{1r, i} \end{bmatrix} + \begin{bmatrix} \epsilon_{i1} \\ \vdots \\ \epsilon_{iJ} \end{bmatrix} $$

with

$$ A = \begin{bmatrix} 1 & 8 \\ 1 & 10 \\ 1 & 12 \\ 1 & 14 \\ \end{bmatrix} $$

after EM we get $$ \hat{\sigma}_{0r}^2, ~ \hat{\sigma}_{1r}^2, ~ \hat{\rho}_{0r, 1r}, ~ \hat{\sigma}_{1r}^2 $$

if we compute, $\forall i \in [1, 2, \ldots, J]$ $$ B = \begin{bmatrix} A & \mathbb{I}_{J\times J} \end{bmatrix} = \begin{bmatrix} 1 & 8 & 1 & 0 & 0 & 0\\ 1 & 10 & 0 & 1 & 0 & 0\\ 1 & 12 & 0 & 0 & 1 & 0 \\ 1 & 14 & 0 & 0 & 0 & 1 \\ \end{bmatrix} $$ $$ \hat{G_i} = \hat{D} = \begin{bmatrix} \hat{\sigma}_{0r}^2 & \hat{\rho}_{0r, 1r} \\ \hat{\rho}_{0r, 1r} & \hat{\sigma}_{1r}^2 \end{bmatrix} $$

$$ \hat{R_i} = \hat{\sigma}^2 \mathbb{I}_{J \times J} $$

$$ \hat{W_i} = \begin{bmatrix} \hat{G_i} & \mathbf{0} \\ \mathbf{0} & \hat{R_i} \end{bmatrix} $$

$$ \hat{V}_i = B \hat{W}_i B^T $$

Consider $$ \begin{bmatrix} \hat{\beta}_{0, i} \\ \hat{\beta}_{1, i} \end{bmatrix} = (A^T {\hat{V}_i}^{-1} A)^{-1} A^{T} \hat{V}_i ~ \mathbf{y}_i $$

in our case $$ \hat{V}_i \simeq k \mathbb{I}_{J\times J} $$

thus

$$ \begin{align*} \begin{bmatrix} \hat{\beta}_{0, i} \\ \hat{\beta}_{1, i} \end{bmatrix} &\simeq (A^T ~ \frac{1}{k} \mathbb{I}_{J\times J} ~ A)^{-1} A^{T} k \mathbb{I}_{J\times J} ~ \mathbf{y}_i \\ &= (A^T A)^{-1} A^{T} ~ \mathbf{y}_i \\ &= A^{+} \mathbf{y}_i \end{align*} $$

where $(\cdot)^{+}$ is the Moore-Penrose Pseudoinverse

Consider $$ \begin{bmatrix} \hat{\beta}_{0r, i} \\ \hat{\beta}_{1r, i} \end{bmatrix} = - \hat{G_i} A^T \hat{V}_i^{-1} (\mathbf{y}_i - A \begin{bmatrix} \hat{\beta}_{0, i} \\ \hat{\beta}_{1, i} \end{bmatrix} ) $$

in our case $$ \begin{bmatrix} \hat{\beta}_{0r, i} \\ \hat{\beta}_{1r, i} \end{bmatrix} \simeq \begin{bmatrix} 0 \\ 0 \end{bmatrix} $$

$$ \begin{align*} \Rightarrow \mathbf{y}_{fit ~ i} &= A \begin{bmatrix} \hat{\beta}_{0, i} \\ \hat{\beta}_{1, i} \end{bmatrix} + A \begin{bmatrix} \hat{\beta}_{0r, i} \\ \hat{\beta}_{1r, i} \end{bmatrix} \\ &\simeq A A^{+} \mathbf{y}_i \end{align*} $$

In [28]:
A = X_i
I_J = np.diag([1, 1, 1, 1])             # J = 4

B = np.hstack([A, I_J])

G_estimate = matrix_D[-1]
sigma2_estimate = matrix_sigma2[-1][0][0]
R_estimate = np.eye(J,J) * sigma2_estimate
W = np.block([
    [G_estimate, np.zeros((2, J))],           # G + 0(2x4)
    [np.zeros((J, 2)), R_estimate]            # 0(4x2) + D
])
V = B @ W @ B.T
print(np.linalg.inv(V) * 10)
print(np.linalg.inv(A.T @ A) @ A.T)
print(np.linalg.inv(A.T @ np.linalg.inv(V) @ A ) @ A.T @ np.linalg.inv(V))
'''begin test : for one line in y_ij, with line in [1, I]'''
line = 27
##beta_estimate_line =  np.linalg.inv(X.T @ np.linalg.inv(V) @ X ) @ X.T @ np.linalg.inv(V) @ Y
beta_estimate_line = np.linalg.inv(A.T @ np.linalg.inv(V) @ A ) @ A.T @ np.linalg.inv(V) @ y_ij[line-1,:].reshape(-1,1)
##beta_estimate_line = matrix_beta[-1]

y_fit_line = A @ beta_estimate_line + A @ (- G_estimate @ A.T @ np.linalg.inv(V) @ (y_ij[line-1,:].reshape(-1,1) - A @ beta_estimate_line))
print("here", - G_estimate @ A.T @ np.linalg.inv(V) @ (y_ij[line-1,:].reshape(-1,1) - A @ beta_estimate_line))
##y_fit_line = A @ beta_estimate_line
print(f"y_{line} = \n ", y_ij[line-1,:].reshape(-1,1))
print(f"y_fit_{line} = \n ", y_fit_line)
'''end test'''

y_fit_ij = np.vstack([
    (
        A @ beta_estimate_i + A @ (- G_estimate @ A.T @ np.linalg.inv(V) @ (y_ij[i-1,:].reshape(-1,1) - A @ beta_estimate_i ))
    ).T
    for i in range(1, I+1)  # i 从 1 到 27
    for beta_estimate_i in [np.linalg.inv(A.T @ np.linalg.inv(V) @ A) @ A.T @ np.linalg.inv(V) @ y_ij[i-1,:].reshape(-1,1)]
])

print("y_ij = \n " , y_ij)
print("y_fit_ij = \n " , y_fit_ij)

'''
problem is that 
print(A @ (- G_estimate @ A.T @ np.linalg.inv(V) @ (y_ij[line-1,:].reshape(-1,1) - A @ beta_estimate_line)))
is around 0 !
it means
A @ (np.linalg.inv(A.T @ np.linalg.inv(V) @ A ) @ A.T @ np.linalg.inv(V) @ y_ij[line-1,:].reshape(-1,1))
already gives the fit result for individuel = line
'''
[[ 2.13397251 -0.23795118 -0.25329981 -0.26864844]
 [-0.23795118  2.09224372 -0.29071151 -0.31709168]
 [-0.25329981 -0.29071151  2.02845185 -0.36553492]
 [-0.26864844 -0.31709168 -0.36553492  1.9425969 ]]
[[ 1.9   0.8  -0.3  -1.4 ]
 [-0.15 -0.05  0.05  0.15]]
[[ 1.9   0.8  -0.3  -1.4 ]
 [-0.15 -0.05  0.05  0.15]]
here [[-3.51108032e-15]
 [-7.28583860e-16]]
y_27 = 
  [[24.5]
 [25. ]
 [28. ]
 [28. ]]
y_fit_27 = 
  [[24.35]
 [25.7 ]
 [27.05]
 [28.4 ]]
y_ij = 
  [[26.  25.  29.  31. ]
 [21.5 22.5 23.  26.5]
 [23.  22.5 24.  27.5]
 [25.5 27.5 26.5 27. ]
 [20.  23.5 22.5 26. ]
 [24.5 25.5 27.  28.5]
 [22.  22.  24.5 26.5]
 [24.  21.5 24.5 25.5]
 [23.  20.5 31.  26. ]
 [27.5 28.  31.  31.5]
 [23.  23.  23.5 25. ]
 [21.5 23.5 24.  28. ]
 [17.  24.5 26.  29.5]
 [22.5 25.5 25.5 26. ]
 [23.  24.5 26.  30. ]
 [22.  21.5 23.5 25. ]
 [21.  20.  21.5 23. ]
 [21.  21.5 24.  25.5]
 [20.5 24.  24.5 26. ]
 [23.5 24.5 25.  26.5]
 [21.5 23.  22.5 23.5]
 [20.  21.  21.  22.5]
 [21.5 22.5 23.  25. ]
 [23.  23.  23.5 24. ]
 [20.  21.  22.  21.5]
 [16.5 19.  19.  19.5]
 [24.5 25.  28.  28. ]]
y_fit_ij = 
  [[24.9  26.8  28.7  30.6 ]
 [21.05 22.6  24.15 25.7 ]
 [22.   23.5  25.   26.5 ]
 [26.1  26.45 26.8  27.15]
 [20.45 22.15 23.85 25.55]
 [24.35 25.7  27.05 28.4 ]
 [21.35 22.95 24.55 26.15]
 [22.75 23.5  24.25 25.  ]
 [22.2  24.15 26.1  28.05]
 [27.25 28.75 30.25 31.75]
 [22.65 23.3  23.95 24.6 ]
 [21.25 23.25 25.25 27.25]
 [18.4  22.3  26.2  30.1 ]
 [23.3  24.35 25.4  26.45]
 [22.5  24.75 27.   29.25]
 [21.35 22.45 23.55 24.65]
 [20.25 21.   21.75 22.5 ]
 [20.6  22.2  23.8  25.4 ]
 [21.2  22.9  24.6  26.3 ]
 [23.45 24.4  25.35 26.3 ]
 [21.8  22.35 22.9  23.45]
 [20.   20.75 21.5  22.25]
 [21.35 22.45 23.55 24.65]
 [22.85 23.2  23.55 23.9 ]
 [20.3  20.85 21.4  21.95]
 [17.15 18.05 18.95 19.85]
 [24.35 25.7  27.05 28.4 ]]
Out[28]:
'\nproblem is that \nprint(A @ (- G_estimate @ A.T @ np.linalg.inv(V) @ (y_ij[line-1,:].reshape(-1,1) - A @ beta_estimate_line)))\nis around 0 !\nit means\nA @ (np.linalg.inv(A.T @ np.linalg.inv(V) @ A ) @ A.T @ np.linalg.inv(V) @ y_ij[line-1,:].reshape(-1,1))\nalready gives the fit result for individuel = line\n'
In [32]:
import numpy as np
import matplotlib.pyplot as plt

Individual = df['Individu'][::4].values
Sex = df['Sex'][::4].values

t_j = np.array([8, 10, 12, 14])  # Time points (Age)

(I, J) = np.shape(y_ij)

# Set parameter: choose every 'm' individuals
m = 1  # Change this to set the interval for subplot selection
selected_indices = np.arange(0, I, m)  # Select every m-th row

# Number of selected individuals
num_selected = len(selected_indices)

# Create figure with subplots (2 columns)
ncols_=4
fig, axes = plt.subplots(nrows=(num_selected + 1) // ncols_, ncols=ncols_, figsize=(16, 4 * ((num_selected + 1) // ncols_)))
axes = axes.flatten()  # Flatten in case of odd subplot count

for idx, i in enumerate(selected_indices):
    ax = axes[idx]  # Select subplot

    # Extract individual label
    label = f'{Individual[i]} ({Sex[i]})'

    # Plot scatter points for y_ij
    ax.scatter(t_j, y_ij[i], color='red', label=f'{label} Observed', marker='o')

    # Plot y_fit as a line
    ax.plot(t_j, y_fit_ij[i], color='blue', label=f'{label} Fitted', linestyle='-', marker='s')

    # Labels and title
    ax.set_xlabel('t_j (Age)')
    ax.set_ylabel('Distance')
    ax.set_title(f'Individual: {label}')
    ax.legend()
    ax.grid(True, linestyle='--', alpha=0.7)

# Adjust layout
plt.tight_layout()
plt.show()