Problem 1:

Using a for loop, write a function to calculate the number of zeroes in a numeric vector

myvector <- rbinom(n=45,size = 1,prob = 0.9)
counter <- 0

for(i in 1:length(myvector)){
  if(myvector[i]==0)
    counter <- counter + 1
}

print(counter)
## [1] 6

Problem 2:

Use subsetting instead of a loop to rewrite the function as a single line of code.

myvectorsub <- myvector[myvector == 0]
length(myvectorsub)
## [1] 6

Problem 3:

Write a function that takes as input two integers representing the number of rows and columns in a matrix.

makemat <- function(x,y) {
  mat <- matrix(nrow = x, ncol = y)
  
  for(i in 1:nrow(mat)){
    for(j in 1:ncol(mat)){
     mat[i,j] <- i*j
    }
  }
  print(mat)
}

makemat(x=sample(1:10, 1),y=sample(1:10, 1))
##       [,1] [,2] [,3] [,4]
##  [1,]    1    2    3    4
##  [2,]    2    4    6    8
##  [3,]    3    6    9   12
##  [4,]    4    8   12   16
##  [5,]    5   10   15   20
##  [6,]    6   12   18   24
##  [7,]    7   14   21   28
##  [8,]    8   16   24   32
##  [9,]    9   18   27   36
## [10,]   10   20   30   40

Problem 4: Practice calling custom functions within a for loop

PART A: a dataset with 3 groups of data, each group drawn from a distribution with a different mean. The final data frame should have 1 column for group and 1 column for the response variable.

Group1 <- rnorm(10, mean = 5)
Group2 <- rnorm(10, mean = 2)
Group3 <- rnorm(10, mean = 8)

Groups <- c(rep("Group1",10),rep("Group2",10),rep("Group3",10))
ResVar <- c(Group1, Group2, Group3)
Group.df <- data.frame(Groups, ResVar) 
head(Group.df)
##   Groups   ResVar
## 1 Group1 6.318989
## 2 Group1 4.698730
## 3 Group1 3.166804
## 4 Group1 6.490198
## 5 Group1 5.515674
## 6 Group1 5.477660

PART B: Write a custom function that 1) reshuffles the response variable, and 2) calculates the mean of each group in the reshuffled data. Store the means in a vector of length 3.

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
shuffle_data <- function(){
  
  ShuffResVar <- sample(Group.df$ResVar)
  Shuff.df <- data.frame(Groups, ShuffResVar)
  
  Averages <- Shuff.df %>%
    group_by(Groups) %>%
    summarise_at(vars(ShuffResVar), list(name=mean))

  averages<-(Averages$name)
  
  return(averages)

}

shuffle_data()
## [1] 3.873219 6.489461 5.021049

PART C:Use a for loop to repeat the function in b 100 times. Store the results in a data frame that has 1 column indicating the replicate number and 1 column for each new group mean, for a total of 4 columns.

# want to create an empty data frame. 
# use i to append onto data frame at [i] row 

n_sim <- 100

Replicate <- rep(NA, 100)
Group1 <- rep(NA, 100)
Group2 <- rep(NA, 100)
Group3 <- rep(NA, 100)

Randomization <- data.frame(Replicate,Group1,Group2,Group3)

for(i in seq_len(n_sim)){
   temp1 <- shuffle_data()
   
   Randomization[i,] <- c(i,temp1)
  
}

head(Randomization)
##   Replicate   Group1   Group2   Group3
## 1         1 5.343009 4.553961 5.486759
## 2         2 5.467254 5.078507 4.837968
## 3         3 4.882255 5.844317 4.657157
## 4         4 6.084798 4.673599 4.625332
## 5         5 5.124287 5.772177 4.487266
## 6         6 5.531774 4.437212 5.414743

PART D: Use qplot() to create a histogram of the means for each reshuffled group.How do the distributions of reshuffled means compare to the original means?

library(ggplot2)

g1 <- ggplot(data=Randomization,
             mapping=aes(x=Group1,fill=I("tomato"),color=I("black"))) + geom_histogram()
print(g1)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

g2 <- ggplot(data=Randomization,
             mapping=aes(x=Group2,fill=I("tomato"),color=I("black"))) + geom_histogram()
print(g2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

g3 <- ggplot(data=Randomization,
             mapping=aes(x=Group3,fill=I("tomato"),color=I("black"))) + geom_histogram()
print(g3)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Problem 5: Statistical Analysis

Standard Statistical Analysis of Group Means:

ano_model <- aov(ResVar~Groups,data=Group.df)
  z <- summary(ano_model)
  flat_out <- unlist(z)
  ano_stats <- list(f_ratio <- unlist(z)[7],
                    f_pval <- unlist(z)[9])

  print(ano_stats)
## [[1]]
## F value1 
## 45.01726 
## 
## [[2]]
##      Pr(>F)1 
## 2.518676e-09

p-values from randomization test

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ tibble  3.1.6     ✓ purrr   0.3.4
## ✓ tidyr   1.1.4     ✓ stringr 1.4.0
## ✓ readr   2.1.1     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
Rand_Anova <- pivot_longer(Randomization,Group1:Group3,names_to = "Groups", values_to = "ResVar")
head(Rand_Anova)
## # A tibble: 6 × 3
##   Replicate Groups ResVar
##       <dbl> <chr>   <dbl>
## 1         1 Group1   5.34
## 2         1 Group2   4.55
## 3         1 Group3   5.49
## 4         2 Group1   5.47
## 5         2 Group2   5.08
## 6         2 Group3   4.84
Rand_ano_model <- aov(ResVar~Groups,data=Rand_Anova)
  z2 <- summary(ano_model)
  flat_out2 <- unlist(z)
  Rand_ano_stats <- list(f_ratio <- unlist(z)[7],
                    f_pval <- unlist(z)[9])

  print(Rand_ano_stats)
## [[1]]
## F value1 
## 45.01726 
## 
## [[2]]
##      Pr(>F)1 
## 2.518676e-09