I have a dataset that contains results for many tests across many samples. The samples are replicated within the dataset. I would like to compare the test results between replic
You are on the right track with t(combn(nrow(data),2))
. See below for how I would do it.
testCols <- which(grepl("^Test\\d+",colnames(data)))
TestsCompare=function(x,y){
##how many non-NA values overlap
overlaps <- sum(!is.na(x) & !is.na(y))
##of those that overlap, how many match
matches <- sum(x==y, na.rm=T)
##of those that overlap, how many do not match
non_matches <- overlaps - matches # complement of matches
c(overlaps,matches,non_matches)
}
RowCompare= function(x){
comp <- NULL
pairs <- t(combn(nrow(x),2))
for(i in 1:nrow(pairs)){
row_a <- pairs[i,1]
row_b <- pairs[i,2]
a_tests <- x[row_a,testCols]
b_tests <- x[row_b,testCols]
comp <- rbind(comp, c(row_a, row_b, TestsCompare(a_tests, b_tests)))
}
colnames(comp) <- c("row_a","row_b","overlaps","matches","non_matches")
return(comp)
}
out = lapply(data.split, RowCompare)
Produces:
> out
$Sample1
row_a row_b overlaps matches non_matches
[1,] 1 2 8 6 2
$Sample2
row_a row_b overlaps matches non_matches
[1,] 1 2 10 9 1
[2,] 1 3 9 8 1
[3,] 2 3 9 9 0