Based on Chapter 7 of ModernDive. Code for Quiz 11.
-Make sure you have installed and loaded the tidyverse and the moderndive packages -Fill in the blanks -Put the command you use in the Rchunks in your Rmd file for this quiz. -Modify the code for comparing differnet sample sizes from the virtual bowl -Segment 1: sample size = SEE QUIZ
virtual_samples_30 <- bowl %>%
rep_sample_n(size = 30, reps = 1200)
1.b) Compute resulting 1200 replicates of proportion red
-start with virtual_samples_30 THEN -group_by replicate THEN -create variable red equal to the sum of all the red balls -create variable prop_red equal to variable red / 30 -Assign the output to virtual_prop_red_30virtual_prop_red_30 <- virtual_samples_30 %>%
group_by(replicate) %>%
mutate(red = sum(color == "red")) %>%
mutate(prop_red = red / 30)
1.c) Plot distribution of virtual_prop_red_30 via a histogram
-use labs to -label x axis = “Proportion of 30 balls that were red” -create title = “30”ggplot(virtual_prop_red_30, aes(x = prop_red)) +
geom_histogram(binwidth = 0.05, boundary = 0.4, color = "white") +
labs(x = "Proportion of 30 balls that were red", title = "30")
ggsave(filename = "preview.png",
path = here::here("_posts", "2021-05-03-sampling"))
virtual_samples_55 <- bowl %>%
rep_sample_n(size = 55, reps = 1200)
2.b) Compute resulting 1200 replicates of proportion red
-start with virtual_samples_55 THEN -group_by replicate THEN -create variable red equal to the sum of all the red balls -create variable prop_red equal to variable red / 55 -Assign the output to virtual_prop_red_55virtual_prop_red_55 <- virtual_samples_55 %>%
group_by(replicate) %>%
mutate(red = sum(color == "red")) %>%
mutate(prop_red = red / 55)
2.c) Plot distribution of virtual_prop_red_55 via a histogram
-use labs to -label x axis = “Proportion of 55 balls that were red” -create title = “55”ggplot(virtual_prop_red_55, aes(x = prop_red)) +
geom_histogram(binwidth = 0.05, boundary = 0.4, color = "white") +
labs(x = "Proportion of 55 balls that were red", title = "55")
virtual_samples_120 <- bowl %>%
rep_sample_n(size = 120, reps = 1200)
3.b) Compute resulting 1200 replicates of proportion red
-start with virtual_samples_120 THEN -group_by replicate THEN -create variable red equal to the sum of all the red balls -create variable prop_red equal to variable red / 120 -Assign the output to virtual_prop_red_120virtual_prop_red_120 <- virtual_samples_120 %>%
group_by(replicate) %>%
mutate(red = sum(color == "red")) %>%
mutate(prop_red = red / 120)
3.c) Plot distribution of virtual_prop_red_120 via a histogram
-use labs to -label x axis = “Proportion of 120 balls that were red” -create title = “120”ggplot(virtual_prop_red_120, aes(x = prop_red)) +
geom_histogram(binwidth = 0.05, boundary = 0.4, color = "white") +
labs(x = "Proportion of 120 balls that were red", title = "120")
virtual_prop_red_30 %>%
summarize(sd = sd(prop_red))
# A tibble: 1,200 x 2
replicate sd
* <int> <dbl>
1 1 0
2 2 0
3 3 0
4 4 0
5 5 0
6 6 0
7 7 0
8 8 0
9 9 0
10 10 0
# … with 1,190 more rows
virtual_prop_red_55 %>%
summarize(sd = sd(prop_red))
# A tibble: 1,200 x 2
replicate sd
* <int> <dbl>
1 1 0
2 2 0
3 3 0
4 4 0
5 5 0
6 6 0
7 7 0
8 8 0
9 9 0
10 10 0
# … with 1,190 more rows
virtual_prop_red_120 %>%
summarize(sd = sd(prop_red))
# A tibble: 1,200 x 2
replicate sd
* <int> <dbl>
1 1 0
2 2 0
3 3 0
4 4 0
5 5 0
6 6 0
7 7 0
8 8 0
9 9 0
10 10 0
# … with 1,190 more rows
The distribution with sample size, n = 120, has the smallest standard deviation (spread) around the estimated proportion of red balls.