{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Introduction to mathematical statistics \n",
    "\n",
    "Welcome to the lecture 4 in 02403\n",
    "\n",
    "During the lectures we will present both slides and notebooks. \n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import scipy.stats as stats"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Example average and variance og normal sample"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "n = 5\n",
    "k = 10000\n",
    "y = stats.norm.rvs(loc=10, scale=np.sqrt(2), size=(k,n))\n",
    "y_bar = y.mean(axis=1)\n",
    "S2 = y.var(axis=1,ddof=1)\n",
    "fig, ax =plt.subplots(1,2)\n",
    "ax[0].hist(y_bar,bins=20,density=True)\n",
    "ax[1].hist(S2*(n-1)/2,bins=20,density=True)\n",
    "\n",
    "x = np.arange(7,13,0.1)\n",
    "ax[0].plot(x,stats.norm.pdf(x,loc=10,scale=np.sqrt(2/5)))\n",
    "\n",
    "x = np.arange(0,20,0.1)\n",
    "ax[1].plot(x,stats.chi2.pdf(x,df = 4))\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## t-quantiles for different df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "n = np.arange(1,100,1)\n",
    "## plt.plot(n,stats.t.ppf(0.975,n))\n",
    "plt.semilogx(n,stats.t.ppf(0.975,n))\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Weight of newborn girls"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "stats.t.ppf(0.975,49) * 467.9 /np.sqrt(50)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "t_obs = (3505.7-3300)/(467.9/np.sqrt(50))\n",
    "t_obs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pv  = 1-stats.t.cdf(t_obs,49)\n",
    "print(pv)\n",
    "print(2*pv)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Example: Equal variances?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "1-stats.f.cdf(2,9,9)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Two independent samples"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    " 1-stats.t.cdf(4,18)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3 independent samples"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "1-stats.f.cdf(2.5,2,27)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Example: 2021 June"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "## 2)\n",
    "print(\"q 2\",stats.f.cdf(2,1,1))\n",
    "## 3)\n",
    "print(\"q 3\",stats.t.cdf(-1,1))\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.16"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}