I have a column name Parent and Child in table Example and Below is the Table Data
| Parent |
Here is a cut-down version of the query from a more generic question How to find all connected subgraphs of an undirected graph
The main idea is to treat (Parent,Child) pairs as edges in a graph and traverse all connected edges starting from a given node.
Since the graph is undirectional we build a list of pairs in both directions in CTE_Pairs
at first.
CTE_Recursive
follows the edges of a graph and stops when it detects a loop. It builds a path of visited nodes as a string in IDPath
and stops the recursion if the new node is in the path (has been visited before).
Final CTE_CleanResult
puts all found nodes in one simple list.
CREATE PROCEDURE GetAncestors(@thingID varchar(8000))
AS
BEGIN
SET NOCOUNT ON;
WITH
CTE_Pairs
AS
(
SELECT
CAST(Parent AS varchar(8000)) AS ID1
,CAST(Child AS varchar(8000)) AS ID2
FROM Example
WHERE Parent <> Child
UNION
SELECT
CAST(Child AS varchar(8000)) AS ID1
,CAST(Parent AS varchar(8000)) AS ID2
FROM Example
WHERE Parent <> Child
)
,CTE_Recursive
AS
(
SELECT
ID1 AS AnchorID
,ID1
,ID2
,CAST(',' + ID1 + ',' + ID2 + ',' AS varchar(8000)) AS IDPath
,1 AS Lvl
FROM
CTE_Pairs
WHERE ID1 = @thingID
UNION ALL
SELECT
CTE_Recursive.AnchorID
,CTE_Pairs.ID1
,CTE_Pairs.ID2
,CAST(CTE_Recursive.IDPath + CTE_Pairs.ID2 + ',' AS varchar(8000)) AS IDPath
,CTE_Recursive.Lvl + 1 AS Lvl
FROM
CTE_Pairs
INNER JOIN CTE_Recursive ON CTE_Recursive.ID2 = CTE_Pairs.ID1
WHERE
CTE_Recursive.IDPath NOT LIKE '%,' + CTE_Pairs.ID2 + ',%'
)
,CTE_RecursionResult
AS
(
SELECT AnchorID, ID1, ID2
FROM CTE_Recursive
)
,CTE_CleanResult
AS
(
SELECT AnchorID, ID1 AS ID
FROM CTE_RecursionResult
UNION
SELECT AnchorID, ID2 AS ID
FROM CTE_RecursionResult
)
SELECT ID
FROM CTE_CleanResult
ORDER BY ID
OPTION(MAXRECURSION 0);
END;
The problem with your attempt is filtering at the start. If I'm right, your want to cluster your data (group them all together) by their relationships, either ascendant or descendant, or a mix of them. For example ID 100
has child 101
, which has another child 102
, but 102
has a parent 103
and you want the result to be these four (100, 101, 102, 103
) for any input that is in that set. This is why you can't filter at the start, since you don't have any means of knowing which relationship will be chained throughout another relationship.
Solving this isn't as simple as it seems and you won't be able to solve it with just 1 recursion.
The following is a solution I made long time ago to group all these relationships together. Keep in mind that, for large datasets (over 100k), it might take a while to calculate, since it has to identify all groups first, and select the result at the end.
CREATE PROCEDURE GetAncestors(@thingID INT)
AS
BEGIN
SET NOCOUNT ON
-- Load your data
IF OBJECT_ID('tempdb..#TreeRelationship') IS NOT NULL
DROP TABLE #TreeRelationship
CREATE TABLE #TreeRelationship (
RelationID INT IDENTITY(1,1) PRIMARY KEY NONCLUSTERED,
Parent INT,
Child INT,
GroupID INT)
INSERT INTO #TreeRelationship (
Parent,
Child)
SELECT
Parent = D.Parent,
Child = D.Child
FROM
Example AS D
UNION -- Data has to be loaded in both ways (direct and reverse) for algorithm to work correctly
SELECT
Parent = D.Child,
Child = D.Parent
FROM
Example AS D
-- Start algorithm
IF OBJECT_ID('tempdb..#FirstWork') IS NOT NULL
DROP TABLE #FirstWork
CREATE TABLE #FirstWork (
Parent INT,
Child INT,
ComponentID INT)
CREATE CLUSTERED INDEX CI_FirstWork ON #FirstWork (Parent, Child)
INSERT INTO #FirstWork (
Parent,
Child,
ComponentID)
SELECT DISTINCT
Parent = T.Parent,
Child = T.Child,
ComponentID = ROW_NUMBER() OVER (ORDER BY T.Parent, T.Child)
FROM
#TreeRelationship AS T
IF OBJECT_ID('tempdb..#SecondWork') IS NOT NULL
DROP TABLE #SecondWork
CREATE TABLE #SecondWork (
Component1 INT,
Component2 INT)
CREATE CLUSTERED INDEX CI_SecondWork ON #SecondWork (Component1)
DECLARE @v_CurrentDepthLevel INT = 0
WHILE @v_CurrentDepthLevel < 100 -- Relationships depth level can be controlled with this value
BEGIN
SET @v_CurrentDepthLevel = @v_CurrentDepthLevel + 1
TRUNCATE TABLE #SecondWork
INSERT INTO #SecondWork (
Component1,
Component2)
SELECT DISTINCT
Component1 = t1.ComponentID,
Component2 = t2.ComponentID
FROM
#FirstWork t1
INNER JOIN #FirstWork t2 on
t1.child = t2.parent OR
t1.parent = t2.parent
WHERE
t1.ComponentID <> t2.ComponentID
IF (SELECT COUNT(*) FROM #SecondWork) = 0
BREAK
UPDATE #FirstWork SET
ComponentID = CASE WHEN items.ComponentID < target THEN items.ComponentID ELSE target END
FROM
#FirstWork items
INNER JOIN (
SELECT
Source = Component1,
Target = MIN(Component2)
FROM
#SecondWork
GROUP BY
Component1
) new_components on new_components.source = ComponentID
UPDATE #FirstWork SET
ComponentID = target
FROM #FirstWork items
INNER JOIN(
SELECT
source = component1,
target = MIN(component2)
FROM
#SecondWork
GROUP BY
component1
) new_components ON new_components.source = ComponentID
END
;WITH Groupings AS
(
SELECT
parent,
child,
group_id = DENSE_RANK() OVER (ORDER BY ComponentID DESC)
FROM
#FirstWork
)
UPDATE FG SET
GroupID = IT.group_id
FROM
#TreeRelationship FG
INNER JOIN Groupings IT ON
IT.parent = FG.parent AND
IT.child = FG.child
-- Select the proper result
;WITH IdentifiedGroup AS
(
SELECT TOP 1
T.GroupID
FROM
#TreeRelationship AS T
WHERE
T.Parent = @thingID
)
SELECT DISTINCT
Result = T.Parent
FROM
#TreeRelationship AS T
INNER JOIN IdentifiedGroup AS I ON T.GroupID = I.GroupID
END
You will see that for @thingID
of value 100
, 101
, 102
and 103
the result are these four, and for values 200
, 201
and 202
the results are these three.
I'm pretty sure this isn't an optimal solution, but it gives the correct output and I never had the need to tune it up since it works fast for my requirements.
you can simply use graph processing introduced in SQL Server 2017.
here is an example
https://www.red-gate.com/simple-talk/sql/t-sql-programming/sql-graph-objects-sql-server-2017-good-bad/