{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Introduction to mathematical statistics \n", "\n", "Welcome to the lecture 56 in 02403\n", "\n", "During the lectures we will present both slides and notebooks. \n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import scipy.stats as stats\n", "import pandas as pd" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Example: Candy factory" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "n = 26\n", "y_bar = 200.3\n", "s = 0.75\n", "## CI\n", "ME = stats.t.ppf(0.975, df = 25) * s / np.sqrt(n)\n", "ci = np.array([y_bar - ME, y_bar + ME])\n", "print(stats.t.ppf(0.975, df = 25))\n", "print(\"Conf. int \", ci)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## CLT in action" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "k = 10000\n", "\n", "n = 1\n", "e1 = stats.expon.rvs(loc=0, scale=1, size=(k,n))\n", "u1 = stats.uniform.rvs(loc=0,scale=1, size=(k,n))\n", "\n", "n = 5\n", "e2 = stats.expon.rvs(loc=0, scale=1, size=(k,n))\n", "u2 = stats.uniform.rvs(loc=0,scale=1, size=(k,n))\n", "\n", "n = 15\n", "e3 = stats.expon.rvs(loc=0, scale=1, size=(k,n))\n", "u3 = stats.uniform.rvs(loc=0,scale=1, size=(k,n))\n", "\n", "n = 30\n", "e4 = stats.expon.rvs(loc=0, scale=1, size=(k,n))\n", "u4 = stats.uniform.rvs(loc=0,scale=1, size=(k,n))\n", "\n", "\n", "fig, ax =plt.subplots(2,4)\n", "ax[0,0].hist(e1.mean(axis=1))\n", "ax[1,0].hist(u1.mean(axis=1))\n", "ax[0,1].hist(e2.mean(axis=1))\n", "ax[1,1].hist(u2.mean(axis=1))\n", "ax[0,2].hist(e3.mean(axis=1))\n", "ax[1,2].hist(u3.mean(axis=1))\n", "ax[0,3].hist(e4.mean(axis=1))\n", "ax[1,3].hist(u4.mean(axis=1))\n", "plt.tight_layout()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Example Tablets " ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Conf. int. of sigma^2 [0.00283389 0.01045302]\n", "Conf. int. of sigma [0.05323433 0.10224001]\n" ] } ], "source": [ "n = 20\n", "alpha = 0.05\n", "s = 0.07 \n", "y_bar = 1.01\n", "\n", "CI_s2_low = (20-1) * 0.07 ** 2 /stats.chi2.ppf(0.975,df = 20 - 1)\n", "CI_s2_upp = (20-1) * 0.07 ** 2 /stats.chi2.ppf(0.025,df = 20 - 1)\n", "\n", "CI_s2 = np.array([CI_s2_low,CI_s2_upp])\n", "CI_s = np.sqrt(CI_s2)\n", "\n", "\n", "print(\"Conf. int. of sigma^2 \", CI_s2)\n", "print(\"Conf. int. of sigma \", CI_s)\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Example: Security check" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "n = 40\n", "y_bar = 34.66\n", "s = 10.12\n", "\n", "ME = stats.t.ppf(0.995,df = 40 -1) * s / np.sqrt(40)\n", "\n", "CI = np.array([y_bar - ME, y_bar + ME])\n", "\n", "print(\"99% Conf Int \", CI)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "test statistics" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "t_obs = (y_bar - 30)/(s/np.sqrt(n))\n", "print(t_obs)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "p-value" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "2 * (1 - stats.t.cdf(np.abs(t_obs), df = 39))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Skive fjord " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "SkiveAvg = pd.read_csv(\"../week1/skiveAvg.csv\", sep=';')\n", "SkiveAvg" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "temp = SkiveAvg[\"temp\"]\n", "month = SkiveAvg[\"month\"]\n", "tempJul = temp[month==7]\n", "tempJul" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "temp_test = stats.ttest_1samp(tempJul,popmean= 20)\n", "print(temp_test)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "temp_test2 = stats.ttest_1samp(tempJul-20,popmean= 0)\n", "print(temp_test2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Extracting results" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(temp_test.confidence_interval(0.95))\n", "print(\"p-values\", temp_test.pvalue)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.16" } }, "nbformat": 4, "nbformat_minor": 2 }