This notebook contains material from cbe67701-uncertainty-quantification; content is available on Github.
Created by Ben Whewell (bwhewell@nd.edu)
This example was adapted from:
McClarren, Ryan G (2018). Uncertainty Quantification and Predictive Computational Science: A Foundation for Physical Scientists and Engineers, Chapter 4: Local Sensitivity Analysis Based on Derivative Approximations, Springer, https://doi.org/10.1007/978-3-319-99525-0_4
## import all needed Python libraries here
import numpy as np
from sklearn.datasets import load_boston
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import RidgeCV
# Functions to be used Later
np.random.seed(47)
# Train/Test Split
def tt_split(x,y,size):
inds = np.arange(len(x))
np.random.shuffle(inds)
split = int(np.ceil(len(x)*size))
# x_train, x_test, y_train, y_test
return x[inds[split:]],x[inds[:split]],y[inds[split:]],y[inds[:split]]
# Mean Squared Error
def mse(y,y_hat):
return np.mean((y-y_hat)**2)
QoI $Q(x)$ where $\mathbf{x} = (x_1, ..., x_J)^T$ of $J$ parameters. $I$ Equations that relate the known values of $Q_i$ and $\mathbf{x}_i$ to the unknown sensitivities.
$X_{ij} = (x_{ij} - \bar{x}_j) \quad$ $\mathbf{y} = \begin{pmatrix} Q_1 - Q(\mathbf{\bar{x}}) \\ Q_2 - Q(\mathbf{\bar{x}})\\ ... \\ Q_I - Q(\mathbf{\bar{x}}) \end{pmatrix} \quad $ $\beta = \begin{pmatrix} \left.\frac{\partial Q}{\partial x_1}\right|_\mathbf{\bar{x}} \\ \left.\frac{\partial Q}{\partial x_2}\right|_\mathbf{\bar{x}} \\ ... \\ \left.\frac{\partial Q}{\partial x_J}\right|_\mathbf{\bar{x}} \end{pmatrix}$
This can be rearranged into $\mathbf{X\beta} = \mathbf{y}$, which is similar to a Least Squares problem when $I > J$ and where $\hat{\beta}_{LS} = \left(\mathbf{X}^T\mathbf{X} \right)^{-1}\mathbf{X}^T\mathbf{y}$.
$X_{ij} = \frac{(x_{ij} - \bar{x}_j)}{\bar{x}_j} \quad$ $\beta = \begin{pmatrix} \left.\bar{x}_1\frac{\partial Q}{\partial x_1}\right|_\mathbf{\bar{x}} \\ \left.\bar{x}_2\frac{\partial Q}{\partial x_2}\right|_\mathbf{\bar{x}} \\ ... \\ \left.\bar{x}_J\frac{\partial Q}{\partial x_J}\right|_\mathbf{\bar{x}} \end{pmatrix}$
Ridge regression adds the $\ell_2$ penalty to the $\beta$ minimization problem. This tries to minimize the resulting $\beta$ vector. \begin{equation} \hat{\beta}_{\text{ridge}} = \min_{\beta} \sum_{i =1}^{I} (y_i - \beta \cdot \mathbf{x}_i)^2 + \lambda \beta||_2 \quad \\ ||\beta||_2 = \left(\sum_{i=1}^{I}|\beta_i|^2\right)^{1/2} \end{equation}
\begin{equation} \hat{\beta}_{\text{ridge}} = \left(\mathbf{X}^T\mathbf{X} +\lambda \mathbf{I} \right)^{-1}\mathbf{X}^T\mathbf{y} \end{equation}# Ridge Regression
def ridge_model(x_train,y_train,Lambda,x_test,coef_=False):
beta = np.linalg.inv(x_train.T @ x_train + Lambda *np.eye(x_train.shape[1])) @ x_train.T @ y_train
if coef_:
return beta
return x_test @ beta
$\lambda$ is a hyperparameter that needs to be optimized through cross validation.
# Cross Validation
def cross_validation(x,y,Lambda,k_folds):
# Error matrix
error = np.zeros((k_folds,len(Lambda)))
for ii in range(k_folds):
# Resample the training and testing data
x_train,x_test,y_train,y_test = tt_split(x,y,1/k_folds)
for jj in range(len(Lambda)):
# Fit Model to Different Lambda values
y_hat = ridge_model(x_train,y_train,Lambda[jj],x_test)
# Calculate MSE
error[ii,jj] = mse(y_test,y_hat)
return best_lambda(error,Lambda)
# Calculating Optimal lambda for Ridge Regression
def best_lambda(error,Lambda):
# Maximum lambda within 1 standard error of minimum of mean MSE
bound = np.min(np.mean(error,axis=0))+np.std(error)/np.sqrt(k_folds*len(Lambda))
means = np.mean(error,axis=0)
best = Lambda[0] # initialize
for ii in range(len(Lambda)):
if (bound - means[ii]) > 0 and Lambda[ii] > best:
best = Lambda[ii]
# return Lambda[np.argmin(means)] # minimum MSE
return best
np.random.seed(47)
# I > J (Least Squares Problem, I = 506, J = 13)
X,Y = load_boston(return_X_y=True)
# Normalize Data - Dimensionless
Y = Y - np.mean(Y)
X = (X - np.mean(X,axis=0))/np.mean(X,axis=0)
# Search different lambdas
Lambdas = [0.01,0.1,0.15,0.2,0.5,1,10,20]
k_folds = len(X) # Leave one out validation
# Calculating Optimal Lambda
param_ = cross_validation(X,Y,Lambdas,k_folds=k_folds)
# Train/Test split (20% Testing)
x_train,x_test,y_train,y_test = tt_split(X,Y,0.2)
# Calculate coefficients in Ridge Regression
beta_1 = ridge_model(x_train,y_train,param_,x_test,coef_=True)
np.random.seed(47)
# I < J (Estimating Local Sensitivities, I = 10, J = 13)
X,Y = load_boston(return_X_y=True)
# Randomly Chose 10 simulations
inds = np.random.choice(range(0,len(X)),10)
x = X[inds]; y = Y[inds]
# Normalize Data - Dimensionless
y = y-np.mean(y)
x = (x - np.mean(x,axis=0))/np.mean(x,axis=0)
# Search different lambdas
Lambdas = [0.01,0.1,0.15,0.2,0.5,1,10,20]
k_folds = len(x) # Leave one out validation
# Calculating Optimal Lambda
param_ = cross_validation(X,Y,Lambdas,k_folds=k_folds)
# Train/Test split (20% Testing)
x_train,x_test,y_train,y_test = tt_split(x,y,0.2)
# Calculate coefficients in Ridge Regression
beta_2 = ridge_model(x_train,y_train,param_,x_test,coef_=True)
plt.figure(figsize=(10,8))
plt.plot(beta_2,label='Reduced Simulations (I = 10)',c='b',alpha=0.75,lw=3)
plt.plot(beta_1,label='Full Simulations (I = 506)',c='r',alpha=0.75,lw=3)
plt.grid(); plt.legend(loc='best',prop={'size':16})
plt.tick_params(axis='both', which='major', labelsize=16)
plt.ylabel('Coefficient',fontsize=20); plt.xlabel('Parameter Number',fontsize=20)
plt.show()
fig = plt.figure(figsize=(20,8))
plt.subplot(1, 2, 1)
plt.plot(beta_2,c='b',alpha=0.75,lw=3)
plt.title('Reduced Simulations (I = 10)',fontsize=20); plt.grid()
plt.tick_params(axis='both', which='major', labelsize=16)
plt.ylabel('Coefficient',fontsize=18); plt.xlabel('Parameter Number',fontsize=18)
plt.subplot(1, 2, 2)
plt.plot(beta_1,c='r',alpha=0.75,lw=3)
plt.title('Full Simulations (I = 506)',fontsize=20); plt.grid()
plt.tick_params(axis='both', which='major', labelsize=16)
plt.ylabel('Coefficient',fontsize=18); plt.xlabel('Parameter Number',fontsize=18)
plt.show()
X,Y = load_boston(return_X_y=True)
Lambdas = [0.01,0.1,0.15,0.2,0.5,1,10,20]
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size = 0.2, random_state=5)
k_folds = 25
ridge = RidgeCV(alphas=Lambdas,cv=k_folds,normalize=False)
regression = ridge.fit(x_train,y_train)
y_hat = regression.predict(x_test)
beta_sklearn = regression.coef_