{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Introduction to mathematical statistics \n",
    "\n",
    "Welcome to the lecture 56 in 02403\n",
    "\n",
    "During the lectures we will present both slides and notebooks. \n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import scipy.stats as stats\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Example: Candy factory"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "n = 26\n",
    "y_bar = 200.3\n",
    "s = 0.75\n",
    "## CI\n",
    "ME = stats.t.ppf(0.975, df = 25) * s / np.sqrt(n)\n",
    "ci = np.array([y_bar - ME, y_bar + ME])\n",
    "print(stats.t.ppf(0.975, df = 25))\n",
    "print(\"Conf. int \", ci)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## CLT in action"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "k = 10000\n",
    "\n",
    "n = 1\n",
    "e1 = stats.expon.rvs(loc=0, scale=1, size=(k,n))\n",
    "u1 = stats.uniform.rvs(loc=0,scale=1, size=(k,n))\n",
    "\n",
    "n = 5\n",
    "e2 = stats.expon.rvs(loc=0, scale=1, size=(k,n))\n",
    "u2 = stats.uniform.rvs(loc=0,scale=1, size=(k,n))\n",
    "\n",
    "n = 15\n",
    "e3 = stats.expon.rvs(loc=0, scale=1, size=(k,n))\n",
    "u3 = stats.uniform.rvs(loc=0,scale=1, size=(k,n))\n",
    "\n",
    "n = 30\n",
    "e4 = stats.expon.rvs(loc=0, scale=1, size=(k,n))\n",
    "u4 = stats.uniform.rvs(loc=0,scale=1, size=(k,n))\n",
    "\n",
    "\n",
    "fig, ax =plt.subplots(2,4)\n",
    "ax[0,0].hist(e1.mean(axis=1))\n",
    "ax[1,0].hist(u1.mean(axis=1))\n",
    "ax[0,1].hist(e2.mean(axis=1))\n",
    "ax[1,1].hist(u2.mean(axis=1))\n",
    "ax[0,2].hist(e3.mean(axis=1))\n",
    "ax[1,2].hist(u3.mean(axis=1))\n",
    "ax[0,3].hist(e4.mean(axis=1))\n",
    "ax[1,3].hist(u4.mean(axis=1))\n",
    "plt.tight_layout()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Example Tablets "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Conf. int. of sigma^2  [0.00283389 0.01045302]\n",
      "Conf. int. of sigma  [0.05323433 0.10224001]\n"
     ]
    }
   ],
   "source": [
    "n = 20\n",
    "alpha = 0.05\n",
    "s = 0.07 \n",
    "y_bar = 1.01\n",
    "\n",
    "CI_s2_low = (20-1) * 0.07 ** 2 /stats.chi2.ppf(0.975,df = 20 - 1)\n",
    "CI_s2_upp = (20-1) * 0.07 ** 2 /stats.chi2.ppf(0.025,df = 20 - 1)\n",
    "\n",
    "CI_s2 = np.array([CI_s2_low,CI_s2_upp])\n",
    "CI_s = np.sqrt(CI_s2)\n",
    "\n",
    "\n",
    "print(\"Conf. int. of sigma^2 \", CI_s2)\n",
    "print(\"Conf. int. of sigma \", CI_s)\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Example: Security check"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "n = 40\n",
    "y_bar = 34.66\n",
    "s = 10.12\n",
    "\n",
    "ME = stats.t.ppf(0.995,df = 40 -1) * s / np.sqrt(40)\n",
    "\n",
    "CI = np.array([y_bar - ME, y_bar + ME])\n",
    "\n",
    "print(\"99% Conf Int \", CI)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "test statistics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "t_obs = (y_bar - 30)/(s/np.sqrt(n))\n",
    "print(t_obs)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "p-value"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "2 * (1 - stats.t.cdf(np.abs(t_obs), df = 39))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Skive fjord "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "SkiveAvg = pd.read_csv(\"../week1/skiveAvg.csv\", sep=';')\n",
    "SkiveAvg"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "temp = SkiveAvg[\"temp\"]\n",
    "month = SkiveAvg[\"month\"]\n",
    "tempJul = temp[month==7]\n",
    "tempJul"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "temp_test = stats.ttest_1samp(tempJul,popmean= 20)\n",
    "print(temp_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "temp_test2 = stats.ttest_1samp(tempJul-20,popmean= 0)\n",
    "print(temp_test2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Extracting results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(temp_test.confidence_interval(0.95))\n",
    "print(\"p-values\", temp_test.pvalue)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.16"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}