问题
array A =
2 3
2 5
4 8
5 6
7 8
I'd like to get the result as 'conidx = [2 3 5 6] and [4 7 8]'.
One of the values of [2 3] exists in the 2nd row,
One of the values of [2 5] exists in the 4th row,
so [2 3], [2 5] and [5 6] are connected,
finally I can get the connected indices as [2 3 5 6].
Otherwise, one of the values of [4 8] exists in the 5th row,
so [4 8] and [7 8] are connected, finally I can get the connected indices as [4 7 8].
[3]<-->[2]<-->[5]<-->[6] and [4]<-->[8]<-->[7]
回答1:
build a graph and use graphconncomp
G = sparse( A(:,1), A(:,2), 1, max(A(:)), max(A(:)) );
G = G + G.'; %' make graph undirected
[S C] = graphconncomp( G ); % find connected components
For those of you who do not have bioinformatics toolbox
My implementation of graphconncomp
in mex:
code of graph_conn_comp.m
function [l c] = graph_conn_comp(sA)
%
% Computing connected components of an undirected graph - assuming sA is symmetric
%
% Usage:
% [l c] = graph_conn_comp(sA);
%
% Inputs:
% sA - sparse adjacency matrix (for directed graph - does not have to be symmetric)
%
% Outputs:
% l - components labels
% c - number of connected components
%
%
% Compile using:
% >> mex -O -largeArrayDims graph_conn_comp_mex.cpp
%
%
sA = spfun(@(x) ones(size(x)),sA);
if ~isequal(sA, sA')
[ii jj] = find(sA);
sA = sparse([ii jj],[jj ii], ones(1, 2*numel(ii)), size(sA,1), size(sA,2));
end
[l c] = graph_conn_comp_mex(sA);
l = double(l); % make it compatible of the rest of Matlab
code for graph_conn_comp_mex.cpp
to be compiled in matlab using
>> mex -largeArrayDims -O graph_conn_comp_mex.cpp
#include "mex.h"
#include <string.h> // for memset
/*
* Computing connected components of an undirected graph - assuming sA is symmetric
*
* Usage:
* [l c] = graph_conn_comp_mex(sA);
*
* Inputs:
* sA - sparse adjacency matrix (for directed graph - does not have to be symmetric)
*
* Outputs:
* l - components labels
* c - number of connected components
*
*
* Compile using:
* >> mex -O -largeArrayDims graph_conn_comp_mex.cpp
*
*/
#line __LINE__ "graph_conn_comp_mex"
#define STR(s) #s
#define ERR_CODE(a,b) a ":" "line_" STR(b)
// inputs
enum {
AIN = 0,
NIN };
// outputs
enum {
LOUT = 0,
COUT,
NOUT };
void
ConnComp(const mxArray* sA, unsigned int* pl, const double* pnc, mwIndex start_node);
mwIndex
FindUnLabeled(unsigned int* pl, mwIndex n);
void mexFunction(
int nout,
mxArray* pout[],
int nin,
const mxArray* pin[]) {
if ( nin != NIN )
mexErrMsgIdAndTxt(ERR_CODE(__FILE__, __LINE__),"must have %d inputs", NIN);
if (nout==0)
return;
if (nout != NOUT )
mexErrMsgIdAndTxt(ERR_CODE(__FILE__, __LINE__),"must have exactly %d output", NOUT);
if ( mxIsComplex(pin[AIN]) || !mxIsSparse(pin[AIN]) )
mexErrMsgIdAndTxt(ERR_CODE(__FILE__, __LINE__),"sA must be real sparse matrix");
if ( mxGetM(pin[AIN]) != mxGetN(pin[AIN]) )
mexErrMsgIdAndTxt(ERR_CODE(__FILE__, __LINE__),"sA must be square matrix");
mwIndex n = mxGetM(pin[AIN]); // number of variables
mwIndex ii(0);
// allocate outputs
pout[LOUT] = mxCreateNumericMatrix(1, n, mxUINT32_CLASS, mxREAL);
unsigned int* pl = (unsigned int*)mxGetData(pout[LOUT]);
memset(pl, 0, n*sizeof(unsigned int)); // set initial labels to zero
unsigned int cl = 0;
// number of components
pout[COUT] = mxCreateDoubleMatrix(1, 1, mxREAL);
double* pnc = mxGetPr(pout[COUT]);
pnc[0] = 0; // number of components
ii = 0;
do {
ConnComp(pin[AIN], pl, pnc, ii); // find conn comp
pnc[0]++;
ii = FindUnLabeled(pl, n);
} while ( ii < n );
}
mwIndex
FindUnLabeled(unsigned int* pl, mwIndex n) {
for ( mwIndex ii(0); ii<n ; ii++ ){
if ( pl[ii]==0 ) {
return ii;
}
}
return n+1;
}
void
ConnComp(const mxArray* sA, unsigned int* pl, const double* pnc, mwIndex start_node) {
mwIndex n = mxGetM(sA);
unsigned int curr_label = (unsigned int)(pnc[0]+1);
mwIndex *stack = (mwIndex*)mxMalloc( (n)*sizeof(mwIndex) );
memset(stack, 0, (n)*sizeof(mwIndex));
mwIndex sp(0); // stack pointer
// put start_node on top of stack after labeling it
pl[start_node]=curr_label;
stack[sp] = start_node;
sp++;
mwIndex* pir = mxGetIr(sA);
mwIndex* pjc = mxGetJc(sA);
mwIndex ii(0), neighbor(0);
while ( sp > 0 ) {
// pop start_label from stack
sp--;
start_node = stack[sp];
for ( ii = pjc[start_node] ; ii < pjc[start_node+1] ; ii++ ) {
neighbor = pir[ii];
if (pl[neighbor]==0) { // unlabeled
pl[neighbor] = curr_label; // label it
// push it into stack
stack[sp] = neighbor;
sp++;
} else {
if (pl[neighbor]!=curr_label)
mexErrMsgIdAndTxt(ERR_CODE(__FILE__, __LINE__),"Got mixed labeling %d <-> %d",pl[neighbor], curr_label);
}
}
}
mxFree(stack);
}
回答2:
For those of you who don't have graphconncomp
and don't want to compile Shai's mex function. I've posted a pure-matlab replacement for graphconncomp
as part of gptoolbox
function [S,C] = conncomp(G)
[p,q,r] = dmperm(G'+speye(size(G)));
S = numel(r)-1;
C = cumsum(full(sparse(1,r(1:end-1),1,1,size(G,1))));
C(p) = C;
end
It's also ~10x faster than graphconncomp
for large, sparse input.
来源:https://stackoverflow.com/questions/16883367/how-to-find-connected-components-in-matlab