Using a for loop, write a function to calculate the number of zeroes in a numeric vector
myvector <- rbinom(n=45,size = 1,prob = 0.9)
counter <- 0
for(i in 1:length(myvector)){
if(myvector[i]==0)
counter <- counter + 1
}
print(counter)
## [1] 6
Use subsetting instead of a loop to rewrite the function as a single line of code.
myvectorsub <- myvector[myvector == 0]
length(myvectorsub)
## [1] 6
Write a function that takes as input two integers representing the number of rows and columns in a matrix.
makemat <- function(x,y) {
mat <- matrix(nrow = x, ncol = y)
for(i in 1:nrow(mat)){
for(j in 1:ncol(mat)){
mat[i,j] <- i*j
}
}
print(mat)
}
makemat(x=sample(1:10, 1),y=sample(1:10, 1))
## [,1] [,2] [,3] [,4]
## [1,] 1 2 3 4
## [2,] 2 4 6 8
## [3,] 3 6 9 12
## [4,] 4 8 12 16
## [5,] 5 10 15 20
## [6,] 6 12 18 24
## [7,] 7 14 21 28
## [8,] 8 16 24 32
## [9,] 9 18 27 36
## [10,] 10 20 30 40
PART A: a dataset with 3 groups of data, each group drawn from a distribution with a different mean. The final data frame should have 1 column for group and 1 column for the response variable.
Group1 <- rnorm(10, mean = 5)
Group2 <- rnorm(10, mean = 2)
Group3 <- rnorm(10, mean = 8)
Groups <- c(rep("Group1",10),rep("Group2",10),rep("Group3",10))
ResVar <- c(Group1, Group2, Group3)
Group.df <- data.frame(Groups, ResVar)
head(Group.df)
## Groups ResVar
## 1 Group1 6.318989
## 2 Group1 4.698730
## 3 Group1 3.166804
## 4 Group1 6.490198
## 5 Group1 5.515674
## 6 Group1 5.477660
PART B: Write a custom function that 1) reshuffles the response variable, and 2) calculates the mean of each group in the reshuffled data. Store the means in a vector of length 3.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
shuffle_data <- function(){
ShuffResVar <- sample(Group.df$ResVar)
Shuff.df <- data.frame(Groups, ShuffResVar)
Averages <- Shuff.df %>%
group_by(Groups) %>%
summarise_at(vars(ShuffResVar), list(name=mean))
averages<-(Averages$name)
return(averages)
}
shuffle_data()
## [1] 3.873219 6.489461 5.021049
PART C:Use a for loop to repeat the function in b 100 times. Store the results in a data frame that has 1 column indicating the replicate number and 1 column for each new group mean, for a total of 4 columns.
# want to create an empty data frame.
# use i to append onto data frame at [i] row
n_sim <- 100
Replicate <- rep(NA, 100)
Group1 <- rep(NA, 100)
Group2 <- rep(NA, 100)
Group3 <- rep(NA, 100)
Randomization <- data.frame(Replicate,Group1,Group2,Group3)
for(i in seq_len(n_sim)){
temp1 <- shuffle_data()
Randomization[i,] <- c(i,temp1)
}
head(Randomization)
## Replicate Group1 Group2 Group3
## 1 1 5.343009 4.553961 5.486759
## 2 2 5.467254 5.078507 4.837968
## 3 3 4.882255 5.844317 4.657157
## 4 4 6.084798 4.673599 4.625332
## 5 5 5.124287 5.772177 4.487266
## 6 6 5.531774 4.437212 5.414743
PART D: Use qplot() to create a histogram of the means for each reshuffled group.How do the distributions of reshuffled means compare to the original means?
library(ggplot2)
g1 <- ggplot(data=Randomization,
mapping=aes(x=Group1,fill=I("tomato"),color=I("black"))) + geom_histogram()
print(g1)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
g2 <- ggplot(data=Randomization,
mapping=aes(x=Group2,fill=I("tomato"),color=I("black"))) + geom_histogram()
print(g2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
g3 <- ggplot(data=Randomization,
mapping=aes(x=Group3,fill=I("tomato"),color=I("black"))) + geom_histogram()
print(g3)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Standard Statistical Analysis of Group Means:
ano_model <- aov(ResVar~Groups,data=Group.df)
z <- summary(ano_model)
flat_out <- unlist(z)
ano_stats <- list(f_ratio <- unlist(z)[7],
f_pval <- unlist(z)[9])
print(ano_stats)
## [[1]]
## F value1
## 45.01726
##
## [[2]]
## Pr(>F)1
## 2.518676e-09
p-values from randomization test
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ tibble 3.1.6 ✓ purrr 0.3.4
## ✓ tidyr 1.1.4 ✓ stringr 1.4.0
## ✓ readr 2.1.1 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
Rand_Anova <- pivot_longer(Randomization,Group1:Group3,names_to = "Groups", values_to = "ResVar")
head(Rand_Anova)
## # A tibble: 6 × 3
## Replicate Groups ResVar
## <dbl> <chr> <dbl>
## 1 1 Group1 5.34
## 2 1 Group2 4.55
## 3 1 Group3 5.49
## 4 2 Group1 5.47
## 5 2 Group2 5.08
## 6 2 Group3 4.84
Rand_ano_model <- aov(ResVar~Groups,data=Rand_Anova)
z2 <- summary(ano_model)
flat_out2 <- unlist(z)
Rand_ano_stats <- list(f_ratio <- unlist(z)[7],
f_pval <- unlist(z)[9])
print(Rand_ano_stats)
## [[1]]
## F value1
## 45.01726
##
## [[2]]
## Pr(>F)1
## 2.518676e-09