library(boot)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.6 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.4 ✓ stringr 1.4.0
## ✓ readr 2.1.1 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(ggplot2)
#Hypothesis cowpea (Vigna unguiculata) fertilized with higher rates of Nitrogen will be taller when fully mature.
# lowN: average height = 20 cm, sd 2 based on cultivar
# medN: average height = 30 cm, sd 3 based on cultivar
# highN: average height = 40 cm, sd 2 based on cultivar
# there is more variation at the middle rate because of natural cultivar variation and possible nutrient leaching
# there are 5 reps of 6 different cultivars in each treatment groups
lowN <- rnorm(n=30, mean=20, sd=2)
medN <- rnorm(n=30, mean=30, sd=3)
highN <- rnorm(n=30, mean=40, sd=2)
# there is more variation at the middle rate because of natural variation and possible nutrient leaching
plantdata <- data.frame(lowN,medN,highN)
plantdata %>%
group_by(lowN,medN,highN)
## # A tibble: 30 × 3
## # Groups: lowN, medN, highN [30]
## lowN medN highN
## <dbl> <dbl> <dbl>
## 1 21.1 30.7 39.8
## 2 22.9 28.7 40.9
## 3 19.7 31.6 39.4
## 4 17.4 36.5 41.4
## 5 18.7 32.8 38.1
## 6 19.6 22.8 40.0
## 7 20.5 29.8 39.3
## 8 22.2 34.9 40.0
## 9 20.0 36.6 40.5
## 10 19.7 24.4 39.1
## # … with 20 more rows
data <- plantdata %>%
pivot_longer(cols=lowN:highN,
names_to = "trtmnt",
values_to = "height",
values_drop_na = T)
library(tidyverse)
summary(aov(height~trtmnt, data=data))
## Df Sum Sq Mean Sq F value Pr(>F)
## trtmnt 2 5593 2796.5 484 <2e-16 ***
## Residuals 87 503 5.8
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(data, aes(x=trtmnt, y=height)) +
geom_point()
lowN1 <- rnorm(n=30, mean=20, sd=2)
medN1 <- rnorm(n=30, mean=30, sd=3)
highN1 <- rnorm(n=30, mean=40, sd=2)
# there is more variation at the middle rate because of natural variation and possible nutrient leaching
plantdata1 <- data.frame(lowN1,medN1,highN1)
plantdata1 %>%
group_by(lowN1,medN1,highN1)
## # A tibble: 30 × 3
## # Groups: lowN1, medN1, highN1 [30]
## lowN1 medN1 highN1
## <dbl> <dbl> <dbl>
## 1 20.6 26.2 41.9
## 2 17.9 32.4 37.2
## 3 24.2 30.9 39.4
## 4 21.0 28.8 43.6
## 5 19.8 32.0 40.3
## 6 19.2 24.8 41.0
## 7 17.1 30.7 40.0
## 8 21.0 30.5 38.9
## 9 24.2 25.4 43.1
## 10 20.1 36.0 39.3
## # … with 20 more rows
data1 <- plantdata1 %>%
pivot_longer(cols=lowN1:highN1,
names_to = "trtmnt",
values_to = "height",
values_drop_na = T)
summary(aov(height~trtmnt, data=data1))
## Df Sum Sq Mean Sq F value Pr(>F)
## trtmnt 2 5980 2989.9 507.7 <2e-16 ***
## Residuals 87 512 5.9
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
lowN <- rnorm(n=30, mean=20, sd=2)
medN <- rnorm(n=30, mean=21, sd=3)
highN <- rnorm(n=30, mean=22, sd=2)
plantdata <- data.frame(lowN,medN,highN)
plantdata %>%
group_by(lowN,medN,highN)
## # A tibble: 30 × 3
## # Groups: lowN, medN, highN [30]
## lowN medN highN
## <dbl> <dbl> <dbl>
## 1 21.1 22.1 21.2
## 2 18.4 23.0 22.7
## 3 22.0 24.8 24.1
## 4 17.8 23.0 19.8
## 5 20.8 20.4 23.6
## 6 19.0 20.0 19.2
## 7 18.0 14.2 18.7
## 8 18.3 19.5 22.2
## 9 19.2 17.3 23.5
## 10 20.4 22.1 21.1
## # … with 20 more rows
data <- plantdata %>%
pivot_longer(cols=lowN:highN,
names_to = "trtmnt",
values_to = "height",
values_drop_na = T)
summary(aov(height~trtmnt, data=data))
## Df Sum Sq Mean Sq F value Pr(>F)
## trtmnt 2 44.5 22.271 5.434 0.00597 **
## Residuals 87 356.6 4.098
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# to get a p-value of >0.05 i had to put all of the means between 20-22, and that resulted in a p value of 0.076, making it in significant.
lowN <- rnorm(n=2, mean=20, sd=2)
medN <- rnorm(n=2, mean=30, sd=3)
highN <- rnorm(n=2, mean=40, sd=2)
plantdata <- data.frame(lowN,medN,highN)
plantdata %>%
group_by(lowN,medN,highN)
## # A tibble: 2 × 3
## # Groups: lowN, medN, highN [2]
## lowN medN highN
## <dbl> <dbl> <dbl>
## 1 18.7 31.8 40.6
## 2 17.6 26.3 42.6
data <- plantdata %>%
pivot_longer(cols=lowN:highN,
names_to = "trtmnt",
values_to = "height",
values_drop_na = T)
summary(aov(height~trtmnt, data=data))
## Df Sum Sq Mean Sq F value Pr(>F)
## trtmnt 2 549.9 274.97 46.3 0.00556 **
## Residuals 3 17.8 5.94
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# I had to lower it to 1 observation per treatment in order to get an insignificant p-value. Even at 2 observations per treatment, the p value remains <0.05 at 0.0035, because the mean values remaining the same shows that they are different from one another and it is statistically significant.