Homework 8

library(boot)
library(tidyverse)

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──

## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.6     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.4     ✓ stringr 1.4.0
## ✓ readr   2.1.1     ✓ forcats 0.5.1

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(ggplot2)

#Hypothesis cowpea (Vigna unguiculata) fertilized with higher rates of Nitrogen will be taller when fully mature.

# lowN: average height = 20 cm, sd 2 based on cultivar
# medN: average height = 30 cm, sd 3 based on cultivar 
# highN: average height = 40 cm, sd 2 based on cultivar
# there is more variation at the middle rate because of natural cultivar variation and possible nutrient leaching
# there are 5 reps of 6 different cultivars in each treatment groups

lowN <- rnorm(n=30, mean=20, sd=2)
medN <- rnorm(n=30, mean=30, sd=3)
highN <- rnorm(n=30, mean=40, sd=2)
# there is more variation at the middle rate because of natural variation and possible nutrient leaching
plantdata <- data.frame(lowN,medN,highN)
plantdata %>%
  group_by(lowN,medN,highN)

## # A tibble: 30 × 3
## # Groups:   lowN, medN, highN [30]
##     lowN  medN highN
##    <dbl> <dbl> <dbl>
##  1  21.1  30.7  39.8
##  2  22.9  28.7  40.9
##  3  19.7  31.6  39.4
##  4  17.4  36.5  41.4
##  5  18.7  32.8  38.1
##  6  19.6  22.8  40.0
##  7  20.5  29.8  39.3
##  8  22.2  34.9  40.0
##  9  20.0  36.6  40.5
## 10  19.7  24.4  39.1
## # … with 20 more rows

data <- plantdata %>%
  pivot_longer(cols=lowN:highN,
              names_to = "trtmnt",
              values_to = "height",
              values_drop_na = T)

library(tidyverse)
summary(aov(height~trtmnt, data=data))

##             Df Sum Sq Mean Sq F value Pr(>F)    
## trtmnt       2   5593  2796.5     484 <2e-16 ***
## Residuals   87    503     5.8                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

ggplot(data, aes(x=trtmnt, y=height)) +
  geom_point()

lowN1 <- rnorm(n=30, mean=20, sd=2)
medN1 <- rnorm(n=30, mean=30, sd=3)
highN1 <- rnorm(n=30, mean=40, sd=2)
# there is more variation at the middle rate because of natural variation and possible nutrient leaching
plantdata1 <- data.frame(lowN1,medN1,highN1)
plantdata1 %>%
  group_by(lowN1,medN1,highN1)

## # A tibble: 30 × 3
## # Groups:   lowN1, medN1, highN1 [30]
##    lowN1 medN1 highN1
##    <dbl> <dbl>  <dbl>
##  1  20.6  26.2   41.9
##  2  17.9  32.4   37.2
##  3  24.2  30.9   39.4
##  4  21.0  28.8   43.6
##  5  19.8  32.0   40.3
##  6  19.2  24.8   41.0
##  7  17.1  30.7   40.0
##  8  21.0  30.5   38.9
##  9  24.2  25.4   43.1
## 10  20.1  36.0   39.3
## # … with 20 more rows

data1 <- plantdata1 %>%
  pivot_longer(cols=lowN1:highN1,
              names_to = "trtmnt",
              values_to = "height",
              values_drop_na = T)
summary(aov(height~trtmnt, data=data1))

##             Df Sum Sq Mean Sq F value Pr(>F)    
## trtmnt       2   5980  2989.9   507.7 <2e-16 ***
## Residuals   87    512     5.9                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

lowN <- rnorm(n=30, mean=20, sd=2)
medN <- rnorm(n=30, mean=21, sd=3)
highN <- rnorm(n=30, mean=22, sd=2)
plantdata <- data.frame(lowN,medN,highN)
plantdata %>%
  group_by(lowN,medN,highN)

## # A tibble: 30 × 3
## # Groups:   lowN, medN, highN [30]
##     lowN  medN highN
##    <dbl> <dbl> <dbl>
##  1  21.1  22.1  21.2
##  2  18.4  23.0  22.7
##  3  22.0  24.8  24.1
##  4  17.8  23.0  19.8
##  5  20.8  20.4  23.6
##  6  19.0  20.0  19.2
##  7  18.0  14.2  18.7
##  8  18.3  19.5  22.2
##  9  19.2  17.3  23.5
## 10  20.4  22.1  21.1
## # … with 20 more rows

data <- plantdata %>%
  pivot_longer(cols=lowN:highN,
              names_to = "trtmnt",
              values_to = "height",
              values_drop_na = T)

summary(aov(height~trtmnt, data=data))

##             Df Sum Sq Mean Sq F value  Pr(>F)   
## trtmnt       2   44.5  22.271   5.434 0.00597 **
## Residuals   87  356.6   4.098                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# to get a p-value of >0.05 i had to put all of the means between 20-22, and that resulted in a p  value of 0.076, making it in significant.

lowN <- rnorm(n=2, mean=20, sd=2)
medN <- rnorm(n=2, mean=30, sd=3)
highN <- rnorm(n=2, mean=40, sd=2)
plantdata <- data.frame(lowN,medN,highN)
plantdata %>%
  group_by(lowN,medN,highN)

## # A tibble: 2 × 3
## # Groups:   lowN, medN, highN [2]
##    lowN  medN highN
##   <dbl> <dbl> <dbl>
## 1  18.7  31.8  40.6
## 2  17.6  26.3  42.6

data <- plantdata %>%
  pivot_longer(cols=lowN:highN,
              names_to = "trtmnt",
              values_to = "height",
              values_drop_na = T)
              
summary(aov(height~trtmnt, data=data))

##             Df Sum Sq Mean Sq F value  Pr(>F)   
## trtmnt       2  549.9  274.97    46.3 0.00556 **
## Residuals    3   17.8    5.94                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# I had to lower it to 1 observation per treatment in order to get an insignificant p-value. Even at 2 observations per treatment, the p value remains <0.05 at 0.0035, because the mean values remaining the same shows that they are different from one another and it is statistically significant.

Homework 8

Jasmine Hart

3/16/2022