问题
My aims of this simulation is to evaluate the type 1 error rate of the tests under several combination of factors.
sample sizes-(10,10),(10,25),(25,25),(25,50),(25,100),50,25),(50,100), (100,25),(100,100)
standard deviation ratio- (1.00, 1.50, 2.00, 2.50, 3.00 and 3.50)
distribution of gamma distribution with unequal skewness and equal skewness
The 2 sample test involved are pooled variance t test and welch t test and mann whitney test. I tried to modified a code by using the above combination of factors.
########################################
#for normal distribution setup
# to ensure the reproducity of the result
#(here we declare the random seed generator)
set.seed(1)
## Put the samples sizes into matrix then use a loop for sample sizes
sample_sizes<-matrix(c(10,10,10,25,25,25,25,50,25,100,50,25,50,100,100,25,100,100),
nrow=2)
#create vector to combine all std deviations
sds<-matrix(c(4,4,6,4,8,4,10,4,12,4,14,4),nrow=2)
sd1<-c(4,6,8,10,12)
sd2<-c(4,4,4,4,4)
sds2<-rep(sd2,each=9)
##(use expand.grid)to create a data frame from combination of data
ss_sds1<- expand.grid(sample_sizes[2,], sd1)
#create a matrix combining the fifty four cases of combination of ss and sds
all_combine <- cbind(rep(sample_sizes[1,], 5), ss_sds1,sds2)
# name the column by sample samples 1 and 2 and standard deviation
colnames(all_combine) <- c("m", "n", "sds1","sds2")
#number of simulations
nSims<-10000
#set significance level,alpha for the whole simulation
alpha<-0.05
#set up matrix for storing data from simulation
#set nrow =nsims because wan storing every p-value simulated
matrix1_equal <-matrix(0,nrow=nSims,ncol=9)
matrix4_unequal<-matrix(0,nrow=nSims,ncol=9)
matrix7_mann <-matrix(0,nrow=nSims,ncol=9)
#set up vector for storing data from the three tests (nrow for all_combine=45)
equal1 <- unequal4<- mann7 <- rep(0, nrow(all_combine))
# this loop steps through the all_combine matrix
for(ss in 1:nrow(all_combine))
{
#generate samples from the first column and second column
m<-all_combine[ss,1]
n<-all_combine[ss,2]
for (sim in 1:nSims)
{
#generate random samples from 2 normal distribution
x<-rnorm(m,5,all_combine[ss,3])
y<-rnorm(n,5,4)
#extract p-value out and store every p-value into matrix
matrix1_equal[sim,1]<-t.test(x,y,var.equal=TRUE)$p.value
matrix4_unequal[sim,4]<-t.test(x,y,var.equal=FALSE)$p.value
matrix7_mann[sim,7] <-wilcox.test(x,y)$p.value
}
##store the result
equal1[ss]<- mean(matrix1_equal[,1]<=alpha)
unequal4[ss]<-mean(matrix4_unequal[,4]<=alpha)
mann7[ss]<- mean(matrix7_mann[,7]<=alpha)
}
# combine results
nresult <- cbind(all_combine, equal1, unequal4, mann7)
save.image(file="normal.data")
I am new in R , now i have completed a code in normal distribution and have to add on two more simulation on distribution of gamma distribution by using if else...can anyone pls give some advice how to change from normal distr. to gamma distr? I am stucking in this part right now...
HELP!! the code above gave me result 0.00 for several times, i check them for many times already and yet i did not spot any mistake. Please
回答1:
This is my current coding..
########################################
#for normal distribution setup
# to ensure the reproducity of the result
#(here we declare the random seed generator)
set.seed(1)
## Put the samples sizes into matrix then use a loop for sample sizes
sample_sizes<-matrix(c(10,10,10,25,25,25,25,50,25,100,50,25,50,100,100,25,100,100),
nrow=2)
#create vector to combine all std deviations
sds<-matrix(c(4,4,6,4,8,4,10,4,12,4,14,4),nrow=2)
sd1<-c(4,6,8,10,12)
sd2<-c(4,4,4,4,4)
sds2<-rep(sd2,each=9)
##(use expand.grid)to create a data frame from combination of data
ss_sds1<- expand.grid(sample_sizes[2,], sd1)
#create a matrix combining the fifty four cases of combination of ss and sds
all_combine <- cbind(rep(sample_sizes[1,], 5), ss_sds1,sds2)
# name the column by sample samples 1 and 2 and standard deviation
colnames(all_combine) <- c("m", "n", "sds1","sds2")
#number of simulations
nSims<-10000
#set significance level,alpha for the whole simulation
alpha<-0.05
#set up matrix for storing data from simulation
#set nrow =nsims because wan storing every p-value simulated
matrix1_equal <-matrix(0,nrow=nSims,ncol=9)
matrix4_unequal<-matrix(0,nrow=nSims,ncol=9)
matrix7_mann <-matrix(0,nrow=nSims,ncol=9)
#set up vector for storing data from the three tests (nrow for all_combine=45)
equal1 <- unequal4<- mann7 <- rep(0, nrow(all_combine))
# this loop steps through the all_combine matrix
for(ss in 1:nrow(all_combine))
{
#generate samples from the first column and second column
m<-all_combine[ss,1]
n<-all_combine[ss,2]
for (sim in 1:nSims)
{
#generate random samples from 2 normal distribution
x<-rnorm(m,5,all_combine[ss,3])
y<-rnorm(n,5,4)
#extract p-value out and store every p-value into matrix
matrix1_equal[sim,1]<-t.test(x,y,var.equal=TRUE)$p.value
matrix4_unequal[sim,4]<-t.test(x,y,var.equal=FALSE)$p.value
matrix7_mann[sim,7] <-wilcox.test(x,y)$p.value
}
##store the result
equal1[ss]<- mean(matrix1_equal[,1]<=alpha)
unequal4[ss]<-mean(matrix4_unequal[,4]<=alpha)
mann7[ss]<- mean(matrix7_mann[,7]<=alpha)
}
# combine results
nresult <- cbind(all_combine, equal1, unequal4, mann7)
save.image(file="normal.data")
回答2:
I edited your code to test for type 1 errors. Instead of having multiple nested for loops for each combination of factors I prefer to put all of those combinations into a single matrix and do simulations with each row of said matrix. This makes it much easier to plot out the results. To speed up computation, note that I did far fewer simualations (I changed nSims
), and you would want to change it back. At the end you could then combine your three results matrix to the different combinations of factors.
I have no clue what you had going on with (**ss-1)*nsds+sim**
and opted to change it.
#for normal distribution setup
## Put the samples sizes into matrix then use a loop for sample sizes
sample_sizes<-
matrix(c(10,10,10,25,25,25,25,50,25,100,50,25,50,100,100,25,100,100),
nrow=2)
#create vector to combine all std deviations
sds<-c(4,6,8,10,12,14)
# get all combinations with one row of the sample_sizes matrix
all_combn <- expand.grid(sample_sizes[2,], sds)
# tack on the first row
all_combn <- cbind(rep(sample_sizes[1,], 6), all_combn)
# change the column names
colnames(all_combn) <- c("ss1", "ss2", "sds")
# to ensure the reproducity of the result
#(here we declare the random seed generator)
set.seed(1)
#number of simulations
nSims<-500
# to store your simulations for the three tests
store_sim <- matrix(0, nrow = nSims, ncol = 3)
#set significance level,alpha for the whole simulatio
alpha<-0.05
#set up vector for storing data from the three tests
equal <- unequal<- mann <- rep(0, nrow(all_combn))
# outer loop run nsims for every combinations of std deviations and ss
# this loop steps through the all_combn matrix
for(ss in 1:nrow(all_combn))
{
m<-all_combn[ss,1]
n<-all_combn[ss,2]
for (sim in 1:nSims)
{
#generate random samples from 2 normal distribution
x<-rnorm(m,5,all_combn[ss,3])
y<-rnorm(n,5,4)
#extract p-value out and store it in vectors
store_sim[sim,1]<-t.test(x,y,var.equal=TRUE)$p.value
store_sim[sim,2]<-t.test(x,y,var.equal=FALSE)$p.value
store_sim[sim,3] <-wilcox.test(x,y)$p.value
}
##store the result into matrix defined before
equal[ss]<- sum(store_sim[,1]<alpha)/nSims
unequal[ss]<- sum(store_sim[,2]<alpha)/nSims
mann[ss]<- sum(store_sim[,2]<alpha)/nSims
}
# combine results
answer <- cbind(all_combn, equal, unequal, mann)
head(answer)
ss1 ss2 sds equal unequal mann
1 10 10 4 0.070 0.062 0.062
2 10 25 4 0.046 0.048 0.048
3 25 25 4 0.048 0.048 0.048
4 25 50 4 0.038 0.048 0.048
5 25 100 4 0.058 0.054 0.054
6 50 25 4 0.048 0.054 0.054
来源:https://stackoverflow.com/questions/36451225/r-coding-why-show-0-00-in-result