get consensus of multiple partitioning methods in R

后端 未结 2 671
野的像风
野的像风 2021-01-22 10:04

My data:

data=cbind(c(1,1,2,1,1,3),c(1,1,2,1,1,1),c(2,2,1,2,1,2))
colnames(data)=paste(\"item\",1:3)
rownames(data)=paste(\"method\",1:6)

I wan

相关标签:
2条回答
  • 2021-01-22 10:57

    You can try this, base R:

    res=apply(data,2,function(u) as.numeric(names(sort(table(u), decreasing=T))[1]))
    
    setNames(lapply(unique(res), function(u) names(res)[res==u]), unique(res))
    #$`1`
    #[1] "item 1" "item 2"
    
    #$`2`
    #[1] "item 3"
    
    0 讨论(0)
  • 2021-01-22 10:59

    This function is passed a matrix where each column is an item and each row is a membership vector corresponding to a partition of the items according to a clustering method. The elements (numbers) composing each row have no meaning other than indicating membership and are recycled from row to row. The function returns the majority vote partition. When no consensus exists for an item, the partition given by the first row wins. This allows ordering of the partitions by decreasing values of modularity, for instance.

        consensus.final <-
      function(data){
        output=list()
        for (i in 1:nrow(data)){
          row=as.numeric(data[i,])
          output.inner=list()
          for (j in 1:length(row)){
            group=character()
            group=c(group,colnames(data)[which(row==row[j])])
            output.inner[[j]]=group
          }
          output.inner=unique(output.inner)
          output[[i]]=output.inner
        }
    
        # gives the mode of the vector representing the number of groups found by each method
        consensus.n.comm=as.numeric(names(sort(table(unlist(lapply(output,length))),decreasing=TRUE))[1])
    
        # removes the elements of the list that do not correspond to this consensus solution
        output=output[lapply(output,length)==consensus.n.comm]
    
        # 1) find intersection 
        # 2) use majority vote for elements of each vector that are not part of the intersection
    
        group=list()
    
        for (i in 1:consensus.n.comm){ 
          list.intersection=list()
          for (p in 1:length(output)){
            list.intersection[[p]]=unlist(output[[p]][i])
          }
    
          # candidate group i
          intersection=Reduce(intersect,list.intersection)
          group[[i]]=intersection
    
          # we need to reinforce that group
          for (p in 1:length(list.intersection)){
            vector=setdiff(list.intersection[[p]],intersection)
            if (length(vector)>0){
              for (j in 1:length(vector)){
                counter=vector(length=length(list.intersection))
                for (k in 1:length(list.intersection)){
                  counter[k]=vector[j]%in%list.intersection[[k]]
                }
                if(length(which(counter==TRUE))>=ceiling((length(counter)/2)+0.001)){
                  group[[i]]=c(group[[i]],vector[j])
                }
              }
            }
          }
        }
    
        group=lapply(group,unique)
    
        # variables for which consensus has not been reached
        unclassified=setdiff(colnames(data),unlist(group))
    
        if (length(unclassified)>0){
          for (pp  in 1:length(unclassified)){
            temp=matrix(nrow=length(output),ncol=consensus.n.comm)
            for (i in 1:nrow(temp)){
              for (j in 1:ncol(temp)){
                temp[i,j]=unclassified[pp]%in%unlist(output[[i]][j])
              }
            }
            # use the partition of the first method when no majority exists (this allows ordering of partitions by decreasing modularity values for instance)
            index.best=which(temp[1,]==TRUE)
            group[[index.best]]=c(group[[index.best]],unclassified[pp])
          }
        }
        output=list(group=group,unclassified=unclassified)
      }
    

    Some examples:

    data=cbind(c(1,1,2,1,1,3),c(1,1,2,1,1,1),c(2,2,1,2,1,2))
    colnames(data)=paste("item",1:3)
    rownames(data)=paste("method",1:6)
    data
    consensus.final(data)$group
    
    [[1]]
    [1] "item 1" "item 2"
    
    [[2]]
    [1] "item 3"
    
    data=cbind(c(1,1,1,1,1,3),c(1,1,1,1,1,1),c(1,1,1,2,1,2)) 
    colnames(data)=paste("item",1:3) 
    rownames(data)=paste("method",1:6)
    data
    consensus.final(data)$group
    
    [[1]]
    [1] "item 1" "item 2" "item 3"
    
    data=cbind(c(1,3,2,1),c(2,2,3,3),c(3,1,1,2))
    colnames(data)=paste("item",1:3)
    rownames(data)=paste("method",1:4)
    data
    consensus.final(data)$group
    
    [[1]]
    [1] "item 1"
    
    [[2]]
    [1] "item 2"
    
    [[3]]
    [1] "item 3"
    
    0 讨论(0)
提交回复
热议问题