{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "\n", "*This notebook contains material from [cbe67701-uncertainty-quantification](https://ndcbe.github.io/cbe67701-uncertainty-quantification);\n", "content is available [on Github](https://github.com/ndcbe/cbe67701-uncertainty-quantification.git).*" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\n", "< [3.0 Input Parameter Distributions](https://ndcbe.github.io/cbe67701-uncertainty-quantification/03.00-Input-Parameter-Distributions.html) | [Contents](toc.html) | [3.2 Principal Component Analysis](https://ndcbe.github.io/cbe67701-uncertainty-quantification/03.02-Contributed-Example.html)
"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "VMGtqa5Yxtzu",
"nbpages": {
"level": 1,
"link": "[3.1 Copulas ](https://ndcbe.github.io/cbe67701-uncertainty-quantification/03.01-Contributed-Example.html#3.1-Copulas)",
"section": "3.1 Copulas "
}
},
"source": [
"# 3.1 Copulas "
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "62S8Y1Txxtzv",
"nbpages": {
"level": 1,
"link": "[3.1 Copulas ](https://ndcbe.github.io/cbe67701-uncertainty-quantification/03.01-Contributed-Example.html#3.1-Copulas)",
"section": "3.1 Copulas "
}
},
"source": [
"Created by Krishnendu Mukherjee (kmukherj@nd.edu)"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 1000
},
"colab_type": "code",
"id": "lc1_WU9Txtz3",
"nbpages": {
"level": 1,
"link": "[3.1 Copulas ](https://ndcbe.github.io/cbe67701-uncertainty-quantification/03.01-Contributed-Example.html#3.1-Copulas)",
"section": "3.1 Copulas "
},
"outputId": "1deec55c-d001-41c7-9736-bef32b58ea2a"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: copulas in /anaconda3/lib/python3.7/site-packages (0.3.0)\n",
"Requirement already satisfied: scipy<1.3,>=1.2 in /anaconda3/lib/python3.7/site-packages (from copulas) (1.2.1)\n",
"Requirement already satisfied: pandas<0.25,>=0.22.0 in /anaconda3/lib/python3.7/site-packages (from copulas) (0.24.2)\n",
"Requirement already satisfied: docutils<0.15,>=0.10 in /anaconda3/lib/python3.7/site-packages (from copulas) (0.14)\n",
"Requirement already satisfied: numpy<1.17,>=1.13.1 in /anaconda3/lib/python3.7/site-packages (from copulas) (1.16.2)\n",
"Requirement already satisfied: exrex<0.11,>=0.10.5 in /anaconda3/lib/python3.7/site-packages (from copulas) (0.10.5)\n",
"Requirement already satisfied: boto3<1.10,>=1.7.47 in /anaconda3/lib/python3.7/site-packages (from copulas) (1.9.253)\n",
"Requirement already satisfied: matplotlib<4,>=2.2.2 in /anaconda3/lib/python3.7/site-packages (from copulas) (3.0.3)\n",
"Requirement already satisfied: pytz>=2011k in /anaconda3/lib/python3.7/site-packages (from pandas<0.25,>=0.22.0->copulas) (2018.9)\n",
"Requirement already satisfied: python-dateutil>=2.5.0 in /anaconda3/lib/python3.7/site-packages (from pandas<0.25,>=0.22.0->copulas) (2.8.0)\n",
"Requirement already satisfied: s3transfer<0.3.0,>=0.2.0 in /anaconda3/lib/python3.7/site-packages (from boto3<1.10,>=1.7.47->copulas) (0.2.1)\n",
"Requirement already satisfied: jmespath<1.0.0,>=0.7.1 in /anaconda3/lib/python3.7/site-packages (from boto3<1.10,>=1.7.47->copulas) (0.10.0)\n",
"Requirement already satisfied: botocore<1.13.0,>=1.12.253 in /anaconda3/lib/python3.7/site-packages (from boto3<1.10,>=1.7.47->copulas) (1.12.253)\n",
"Requirement already satisfied: cycler>=0.10 in /anaconda3/lib/python3.7/site-packages (from matplotlib<4,>=2.2.2->copulas) (0.10.0)\n",
"Requirement already satisfied: kiwisolver>=1.0.1 in /anaconda3/lib/python3.7/site-packages (from matplotlib<4,>=2.2.2->copulas) (1.0.1)\n",
"Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /anaconda3/lib/python3.7/site-packages (from matplotlib<4,>=2.2.2->copulas) (2.3.1)\n",
"Requirement already satisfied: six>=1.5 in /anaconda3/lib/python3.7/site-packages (from python-dateutil>=2.5.0->pandas<0.25,>=0.22.0->copulas) (1.12.0)\n",
"Requirement already satisfied: urllib3<1.26,>=1.20; python_version >= \"3.4\" in /anaconda3/lib/python3.7/site-packages (from botocore<1.13.0,>=1.12.253->boto3<1.10,>=1.7.47->copulas) (1.24.1)\n",
"Requirement already satisfied: setuptools in /anaconda3/lib/python3.7/site-packages (from kiwisolver>=1.0.1->matplotlib<4,>=2.2.2->copulas) (40.8.0)\n"
]
}
],
"source": [
"## installing copula library via pip\n",
"!pip install copulas"
]
},
{
"cell_type": "markdown",
"metadata": {
"nbpages": {
"level": 1,
"link": "[3.1 Copulas ](https://ndcbe.github.io/cbe67701-uncertainty-quantification/03.01-Contributed-Example.html#3.1-Copulas)",
"section": "3.1 Copulas "
}
},
"source": [
"**Warning**: If you run this notebook on Colab, you must select **Runtime --> Restart and Run All**. This is because the package `copulas` uses a different version of `pandas` and other packages already available on Colab."
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "oqhFA5S-yp2R",
"nbpages": {
"level": 1,
"link": "[3.1 Copulas ](https://ndcbe.github.io/cbe67701-uncertainty-quantification/03.01-Contributed-Example.html#3.1-Copulas)",
"section": "3.1 Copulas "
}
},
"outputs": [],
"source": [
"## import all needed Python libraries here\n",
"import numpy as np\n",
"import pandas as pd\n",
"from scipy import stats\n",
"import random\n",
"from copulas import random_seed\n",
"import matplotlib.pyplot as plt\n",
"# essential function for generating Normal copula and visualization\n",
"from copulas.multivariate import GaussianMultivariate\n",
"from copulas.visualization import compare_3d\n",
"from mpl_toolkits import mplot3d"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "Y7gRcSFD1mbI",
"nbpages": {
"level": 2,
"link": "[3.1.1 Definition ](https://ndcbe.github.io/cbe67701-uncertainty-quantification/03.01-Contributed-Example.html#3.1.1-Definition)",
"section": "3.1.1 Definition "
}
},
"source": [
"## 3.1.1 Definition ##\n",
"Copulas can help to \"link\" Cumulative distribution function (CDF) to Joint distribution for any number of input parameters. \n",
"\n",
" $$ F_{XY} = C(F_X (x), F_Y (y)) $$\n",
"\n",
"Here, C is the copula function which takes marginal CDF for each variable and creates a joint CDF. \n",
"\n",
"\n",
"* Very useful in finance and insurance industries for modeling risk distribution.\n",
"* They allow one to easily model and estimate the distribution of random vectors by estimating marginals and copulae separately.\n",
"\n",
"\n",
"## Normal Copula ##\n",
"One of the simplest Copula in the Normal/Gaussian copula:\n",
"\n",
"$$ C_N (u,v) = \\Phi_R (\\Phi^{-1}(u), \\Phi^{-1}(v)) $$\n",
"\n",
"Where, $u$ and $v$ are the random variables, $R$ the correlation matrix and $C_N$ is the normal copula. "
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "qo9zJB2z18zg",
"nbpages": {
"level": 2,
"link": "[3.1.1 Definition ](https://ndcbe.github.io/cbe67701-uncertainty-quantification/03.01-Contributed-Example.html#3.1.1-Definition)",
"section": "3.1.1 Definition "
}
},
"outputs": [],
"source": [
"## https://pypi.org/project/copulas/#:~:text=Copulas%20is%20a%20Python%20library,following%20the%20same%20statistical%20properties. \n",
"## Defining A trivariate correlated distribution, 2 beta and 1 Normal \n",
"def sample_trivariate_xyz(size=1000, seed=42):\n",
" \"\"\"Sample from three dimensional toy dataset.\n",
" The output is a DataFrame containing three columns:\n",
" * ``x``: Beta distribution with a=0.1 and b=0.1\n",
" * ``y``: Beta distribution with a=0.1 and b=0.5\n",
" * ``z``: Normal distribution + 10 times ``y``\n",
" Args:\n",
" size (int):\n",
" Amount of samples to generate. Defaults to 1000.\n",
" seed (int):\n",
" Random seed to use. Defaults to 42.\n",
" Retruns:\n",
" pandas.DataFrame:\n",
" DataFrame with three columns, ``x``, ``y`` and ``z``.\n",
" \"\"\"\n",
" with random_seed(seed):\n",
" x = stats.beta.rvs(a=0.1, b=0.1, size=size)\n",
" y = stats.beta.rvs(a=0.1, b=0.5, size=size)\n",
" return pd.DataFrame({\n",
" 'x': x,\n",
" 'y': y,\n",
" 'z': np.random.normal(size=size) + y*10\n",
" })\n",
" \n",
"# Defining a function for 3 dimensional plot for visualization, \n",
"# followed by a comparision plot\n",
"\n",
"def scatter_3d(data, title=\"Original dataset\", columns=None, fig=None, position=None):\n",
" \"\"\"Plot 3 dimensional data in a scatter plot.\"\"\"\n",
" fig = fig or plt.figure()\n",
" position = position or 111\n",
"\n",
" ax = fig.add_subplot(position, projection='3d')\n",
" ax.scatter(*(\n",
" data[column]\n",
" for column in columns or data.columns\n",
" ))\n",
" ax.set_xlabel('X',Fontsize=\"10\")\n",
" ax.set_ylabel('Y',Fontsize=\"10\")\n",
" ax.set_zlabel('Z',Fontsize=\"10\")\n",
" \n",
" if title:\n",
" ax.set_title(title,Fontsize=\"20\")\n",
" ax.title.set_position([.5, 1.05])\n",
"\n",
" return ax\n",
"def scatter_3d_2(data, title=\"Syntheic dataset\", columns=None, fig=None, position=None):\n",
" \"\"\"Plot 3 dimensional data in a scatter plot.\"\"\"\n",
" fig = fig or plt.figure()\n",
" position = position or 111\n",
"\n",
" ax = fig.add_subplot(position, projection='3d')\n",
" ax.scatter(*(\n",
" data[column]\n",
" for column in columns or data.columns\n",
" ))\n",
" ax.set_xlabel('X',Fontsize=\"10\")\n",
" ax.set_ylabel('Y',Fontsize=\"10\")\n",
" ax.set_zlabel('Z',Fontsize=\"10\")\n",
" \n",
" if title:\n",
" ax.set_title(title,Fontsize=\"20\")\n",
" ax.title.set_position([.5, 1.05])\n",
"\n",
" return ax\n",
"\n",
"# defining another trivariate distribution for which outputs age, income and health \n",
"# expectancy \n",
"def sample_trivariate_age_income_health(size=100, seed=42):\n",
" \"\"\"Sample from a bivariate toy dataset.\n",
" This dataset contains two columns which correspond to the simulated age and\n",
" income which are positively correlated with outliers.\n",
" Args:\n",
" size (int):\n",
" Amount of samples to generate. Defaults to 100.\n",
" seed (int):\n",
" Random seed to use. Defaults to 42.\n",
" Retruns:\n",
" pandas.DataFrame:\n",
" DataFrame with two columns, ``age`` and ``income``.\n",
" \"\"\"\n",
" with random_seed(seed):\n",
" age = stats.norm.rvs(25, 15.0, size=size)\n",
" income = 2.3*(age**2) \n",
" health_expec = abs(age) + 2*np.log(income) \n",
" return pd.DataFrame({\n",
" \"age\": age,\n",
" \"income\": income,\n",
" \"health_expec\" : health_expec\n",
" })"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 297
},
"colab_type": "code",
"id": "txAg9VxV2E4P",
"nbpages": {
"level": 2,
"link": "[3.1.1 Definition ](https://ndcbe.github.io/cbe67701-uncertainty-quantification/03.01-Contributed-Example.html#3.1.1-Definition)",
"section": "3.1.1 Definition "
},
"outputId": "75f1532e-bdc2-4440-d157-ed601bc561a2"
},
"outputs": [
{
"data": {
"text/plain": [
"
"
]
}
],
"metadata": {
"colab": {
"name": "03.01-Contributed-Example.ipynb",
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 1
}