setwd("~/Kurser/introstat/git/users/jkmo/2024June/lectures/lect01") rm(list=ls()) ## Adding numbers in the console 2+3 y <- 3 x <- c(1, 4, 6, 2) x x <- 1:10 x ## Data collected in a statistics class x <- c(22 , 1.5, 14, 12, 12, 15, 8) # Distance to DTU (km) y <- c(35 , 10, 57, 17, 20, 60, 15) # Distance to DTU (min) ######################################## ## sort(x) mean(x); median(x) (n <- length(x)) (p <- 0.5) n * p sort(x)[4] ?mean sum(x)/length(x) ## Sample variance and standard deviation var(x) sqrt(var(x)) sd(x) cov(x,y) cor(x,y) ## Sample quartiles quantile(x, type = 2) quantile(x, probs = c(0.2, 0.8), type = 2) ## Sample quantiles 0%, 10%,..,90%, 100%: quantile(x, probs=seq(0, 1, by = 0.10), type = 2) ## A histogram of the distance: hist(x) ## A density histogram of the distance: hist(x, freq=FALSE, col="red", nclass=20) plot(ecdf(x), verticals=TRUE) ## A basic boxplot of the distance: (range=0 makes it "basic") boxplot(x, range=0, col="red", main="Basic boxplot") text(1.3, quantile(x), c("Minimum","Q1","Median","Q3","Maximum"), col="blue") ## A modified boxplot of the distance with an ## extreme observation, 150km added: ## The modified version is the default boxplot(c(x,150), col="red", main="Modified boxplot") text(1.3, quantile(c(x,150)), c("Minimum","Q1","Median","Q3", "Maximum"),col="blue") ################# plot(x,y) cor(x,y) ########################################################## ## Skive fjord data skiveAvg <- read.table("skiveAvg.csv", sep = ";", header = TRUE) head(skiveAvg) dim(skiveAvg) summary(skiveAvg[ ,1:4]) hist(skiveAvg$chla) hist(log(skiveAvg$chla)) ############################################# # Terning set.seed(134) sample(1:6,1) ## Flere gange k <- 10 x<-sample(1:6,k,replace=TRUE) x ## Hvor mange 6'er? x sum(x==6) ## Simulation k <- 10000 ## Try with more x <- sample(1:6,k,replace=TRUE) mean(x) var(x) m <- cumsum(x)/(1:k) plot(m,type="l") plot(m[-c(1:200)],type="l") ################################################## # Slides set.seed(1234) n <- 200 x <- runif(n) y1 <- x + rnorm(n,sd=0.1) y2 <- -x + rnorm(n,sd=0.5) y3 <- rnorm(n) y4 <- sin(pi*x) + rnorm(n,sd=0.1) ## cor(cbind(x,y1,y2,y3,y4)) par(mfrow=c(2,2),cex.main=2,cex.lab=2, cex.axis=1.5) par(mar = c(5, 5.5, 1.5, 0.5)) plot(x,y1,pch=19,cex=0.5,ylab="y", main=expression(r%~~%0.95)) plot(x,y2,pch=19,cex=0.5,ylab="y", main=expression(r%~~%-0.5)) plot(x,y3,pch=19,cex=0.5,ylab="y", main=expression(r%~~%0)) plot(x,y4,pch=19,cex=0.5,ylab="y", main=expression(r%~~%0)) ################################################## par(mfrow=c(1,1),cex.main=2,cex.lab=2,mar=c(4.5,4.5,2,2)) ## Data from Skive fjord skiveAvg <- read.table("skiveAvg.csv", sep = ";", header = TRUE) head(skiveAvg) y <- log(skiveAvg$chla) hist(y) ################################################## # ################################################## par(cex.main=2,cex.lab=2,mar=c(4.5,4.5,2,2)) plot(ecdf(y)) ################################################## # ################################################## par(cex.main=2,cex.lab=2,cex.axis=1.5,mar=c(4.5,4.5,2,2)) plot(ecdf(y),axes=FALSE,xlab="y") box() axis(1,at=quantile(y,probs=c(0,0.25,0.5,0.75,1)), labels=c("min",expression(Q[1]),expression(Q[2]),expression(Q[3]),"max")) axis(2,c(0,0.25,0.5,0.75,1)) lines(quantile(y,probs=0.25)*c(1,1), c(0,0.25),col="blue") lines(c(min(y)-5,quantile(y,probs=0.25)), c(0.25,0.25),col="blue") lines(quantile(y,probs=0.75)*c(1,1), c(0,0.75),col="blue") lines(c(min(y)-5,quantile(y,probs=0.75)), c(0.75,0.75),col="blue") lines(quantile(y,probs=0.5)*c(1,1), c(0,0.5),col="blue",lwd=4) lines(c(min(y)-5,quantile(y,probs=0.5)), c(0.5,0.5),col="blue",lwd=4) ################################################## ################################################## # ################################################## par(mfrow=c(1,2),cex.main=2,cex.lab=2,cex.axis=1.5) boxplot(y) boxplot(exp(y)) ################################################## # ################################################## par(mfrow=c(1,1),cex.main=2,cex.lab=2,cex.axis=1.5,mar=c(4.5,4.5,2,2)) plot(ecdf(y),axes=FALSE,xlab="y") box() axis(1,at=sort(c(quantile(y,probs=c(0.025,0.975)),mean(y))), labels=c(expression(tau[0.025]),expression(bar(y)),expression(tau[0.975]))) axis(2,c(0.025,0.5,0.975)) lines(quantile(y,probs=0.025)*c(1,1), c(0,0.025),col="blue") lines(c(min(y)-5,quantile(y,probs=0.025)), c(0.025,0.025),col="blue") lines(quantile(y,probs=0.975)*c(1,1), c(0,0.975),col="blue") lines(c(min(y)-5,quantile(y,probs=0.975)), c(0.975,0.975),col="blue") lines(quantile(y,probs=0.5)*c(1,1), c(0,0.5),col="blue",lwd=4) lines(c(min(y)-5,quantile(y,probs=0.5)), c(0.5,0.5),col="blue",lwd=4) points(mean(y),-0.025,pch=19,cex=2,col=2) lines(mean(y)+2*sd(y)*c(-1,1),-c(1,1)*0.025,col=2) legend(min(y),0.95,legend=c(expression(bar(y)), expression(paste(bar(y),"+/-",2*sd(x)))), pch=c(19,0),lty=c(0,1),pt.cex=c(1,0),col=2,cex=1.5) ################################################## # ################################################## par(cex.main=2,cex.lab=2,cex.axis=1.5,mar=c(4.5,4.5,2,2)) plot(log(chla) ~ temp, data = skiveAvg) ################################################## # ################################################## par(cex.main=2,cex.lab=2,cex.axis=1.5,mar=c(4.5,4.5,2,2)) r <- round(cor(log(skiveAvg$chla), skiveAvg$temp),digits=2) plot(log(chla) ~ temp, data = skiveAvg, main = paste("r = ", r)) lines(mean(skiveAvg$temp)*c(1,1),range(log(skiveAvg$chla)),lty=2,col="blue") lines(range(skiveAvg$temp),mean(log(skiveAvg$chla))*c(1,1),lty=2,col="blue") cex=1.5 legend(-2,-2,legend=c(expression(x[i]bar(y)), expression(paste("(",x[i]-bar(x),")", "(",y[i]-bar(y),")<0"))), box.col="white",text.col="black",bg="white",cex=cex) legend(-2,-6.5,legend=c(expression(x[i]0"))), box.col="white",text.col="black",bg="white",cex=cex) legend(15,-6.5,legend=c(expression(x[i]>bar(x)),expression(y[i]bar(x)),expression(y[i]>bar(y)), expression(paste("(",x[i]-bar(x),")", "(",y[i]-bar(y),")>0"))), box.col="white",text.col="black",bg="white",cex=cex) ################################################## # ##################################################