get consensus of multiple partitioning methods in R

后端未结

关注

 2  671

My data:

data=cbind(c(1,1,2,1,1,3),c(1,1,2,1,1,1),c(2,2,1,2,1,2))
colnames(data)=paste(\"item\",1:3)
rownames(data)=paste(\"method\",1:6)

I wan

相关标签:

2条回答

一整个雨季

2021-01-22 10:57

You can try this, base R:

res=apply(data,2,function(u) as.numeric(names(sort(table(u), decreasing=T))[1]))

setNames(lapply(unique(res), function(u) names(res)[res==u]), unique(res))
#$`1`
#[1] "item 1" "item 2"

#$`2`
#[1] "item 3"

0 讨论(0)

我在风中等你

2021-01-22 10:59

This function is passed a matrix where each column is an item and each row is a membership vector corresponding to a partition of the items according to a clustering method. The elements (numbers) composing each row have no meaning other than indicating membership and are recycled from row to row. The function returns the majority vote partition. When no consensus exists for an item, the partition given by the first row wins. This allows ordering of the partitions by decreasing values of modularity, for instance.

    consensus.final <-
  function(data){
    output=list()
    for (i in 1:nrow(data)){
      row=as.numeric(data[i,])
      output.inner=list()
      for (j in 1:length(row)){
        group=character()
        group=c(group,colnames(data)[which(row==row[j])])
        output.inner[[j]]=group
      }
      output.inner=unique(output.inner)
      output[[i]]=output.inner
    }

    # gives the mode of the vector representing the number of groups found by each method
    consensus.n.comm=as.numeric(names(sort(table(unlist(lapply(output,length))),decreasing=TRUE))[1])

    # removes the elements of the list that do not correspond to this consensus solution
    output=output[lapply(output,length)==consensus.n.comm]

    # 1) find intersection 
    # 2) use majority vote for elements of each vector that are not part of the intersection

    group=list()

    for (i in 1:consensus.n.comm){ 
      list.intersection=list()
      for (p in 1:length(output)){
        list.intersection[[p]]=unlist(output[[p]][i])
      }

      # candidate group i
      intersection=Reduce(intersect,list.intersection)
      group[[i]]=intersection

      # we need to reinforce that group
      for (p in 1:length(list.intersection)){
        vector=setdiff(list.intersection[[p]],intersection)
        if (length(vector)>0){
          for (j in 1:length(vector)){
            counter=vector(length=length(list.intersection))
            for (k in 1:length(list.intersection)){
              counter[k]=vector[j]%in%list.intersection[[k]]
            }
            if(length(which(counter==TRUE))>=ceiling((length(counter)/2)+0.001)){
              group[[i]]=c(group[[i]],vector[j])
            }
          }
        }
      }
    }

    group=lapply(group,unique)

    # variables for which consensus has not been reached
    unclassified=setdiff(colnames(data),unlist(group))

    if (length(unclassified)>0){
      for (pp  in 1:length(unclassified)){
        temp=matrix(nrow=length(output),ncol=consensus.n.comm)
        for (i in 1:nrow(temp)){
          for (j in 1:ncol(temp)){
            temp[i,j]=unclassified[pp]%in%unlist(output[[i]][j])
          }
        }
        # use the partition of the first method when no majority exists (this allows ordering of partitions by decreasing modularity values for instance)
        index.best=which(temp[1,]==TRUE)
        group[[index.best]]=c(group[[index.best]],unclassified[pp])
      }
    }
    output=list(group=group,unclassified=unclassified)
  }

Some examples:

data=cbind(c(1,1,2,1,1,3),c(1,1,2,1,1,1),c(2,2,1,2,1,2))
colnames(data)=paste("item",1:3)
rownames(data)=paste("method",1:6)
data
consensus.final(data)$group

[[1]]
[1] "item 1" "item 2"

[[2]]
[1] "item 3"

data=cbind(c(1,1,1,1,1,3),c(1,1,1,1,1,1),c(1,1,1,2,1,2)) 
colnames(data)=paste("item",1:3) 
rownames(data)=paste("method",1:6)
data
consensus.final(data)$group

[[1]]
[1] "item 1" "item 2" "item 3"

data=cbind(c(1,3,2,1),c(2,2,3,3),c(3,1,1,2))
colnames(data)=paste("item",1:3)
rownames(data)=paste("method",1:4)
data
consensus.final(data)$group

[[1]]
[1] "item 1"

[[2]]
[1] "item 2"

[[3]]
[1] "item 3"

0 讨论(0)