I need to retrieve ListingId from the below table based on the search condition. Kindly help the best way to retrive the query for the conditions below
Note : ListingId
Use HAVING
Instead of Self joins. It is much more efficient as requires no joins and only 1 table scan. It also means if there are multiple conditions it only requires an additional expression in the HAVING
Clause rather than an additional join.
e.g. for your second example:
SELECT ListingID
FROM [YourTable]
GROUP BY ListingID
HAVING COUNT(CASE WHEN ExtrafieldId = 1 AND Value = 1 THEN 1 END) > 0
AND COUNT(CASE WHEN ExtrafieldId = 2 AND Value = 7 THEN 1 END) > 0
ADDENDUM
The above is just plain wrong. I think it is slightly easier on the eye, but the below is much more efficient.
SELECT t1.ListingID
FROM Listing AS t1
INNER JOIN Listing AS t2
ON t2.ListingID = t1.ListingID
INNER JOIN Listing AS t3
ON t3.ListingID = t1.ListingID
INNER JOIN Listing AS t4
ON t4.ListingID = t1.ListingID
WHERE (t1.ExtraFieldID = 1 AND t1.Value = 1)
AND (t2.ExtraFieldID = 2 AND t2.Value = 7)
AND (t3.ExtraFieldID = 3 AND t3.Value = '')
AND (t4.ExtraFieldID = 4 AND t4.Value = 1999)
To prove this I ran the following code to test it:
DECLARE @Iterations INT, @Listings INT
/*******************************************************************************************************
SET THE PARAMETERS FOR THE TEST HERE, @Listings IS THE NUMBER OF ListingIDs TO INSERT INTO THE SAMPLE
TABLE. EACH LISTING GETS 4 RECORDS SO 10,000 LISTINGS WILL GENERATE A SAMPLE OF 40,000 RECORDS ETC.
@Iterations IS THE NUMBER OF SELECTS TO PERFORM TO TEST THE PERFORMANCE OF EACH METHOD.
*******************************************************************************************************/
SET @Iterations = 500
SET @Listings = 1000000
/*******************************************************************************************************/
/*******************************************************************************************************/
IF EXISTS (SELECT * FROM TempDB.INFORMATION_SCHEMA.TABLES WHERE Table_Name LIKE '#Listing%')
BEGIN
DROP TABLE #Listing
END
CREATE TABLE #Listing (ListingID INT NOT NULL, ExtraFieldID TINYINT NOT NULL, Value VARCHAR(4), PRIMARY KEY (ListingID, ExtraFieldID))
IF EXISTS (SELECT * FROM TempDB.INFORMATION_SCHEMA.TABLES WHERE Table_Name LIKE '#Results%')
BEGIN
DROP TABLE #Results
END
CREATE TABLE #Results (GroupBy INT, SelfJoin INT)
DECLARE @i INT, @Time DATETIME, @Time2 DATETIME, @t INT
SET @i = ISNULL((SELECT MAX(ListingID) + 1 FROM #Listing), 0)
-- FILL LISTING TABLE WITH RANDOM VALUES
WHILE @i < @Listings
BEGIN
INSERT #Listing VALUES (@i, 1, ROUND(RAND() * 4, 0))
INSERT #Listing VALUES (@i, 2, ROUND(RAND() * 20, 0))
INSERT #Listing VALUES (@i, 3, CASE WHEN ROUND(RAND(), 0) = 0 THEN '' ELSE CONVERT(VARCHAR(4), ROUND(RAND(), 3) * 1000) END)
INSERT #Listing VALUES (@i, 4, DATEPART(YEAR, DATEADD(YEAR, (RAND()-1) * 100, GETDATE())))
SET @i = @i + 1
END
CREATE NONCLUSTERED INDEX #IX_Listing_Value ON #Listing (Value) WITH FILLFACTOR = 100
SET @i = 0
-- PERFORM BOTH METHODS X NUMBER OF TIMES TO GET AN AVERAGE EXECUTION TIME
WHILE @i < @Iterations
BEGIN
SET @Time = GETDATE()
SELECT @t = COUNT(*)
FROM ( SELECT ListingID
FROM #Listing
GROUP BY ListingID
HAVING COUNT(CASE WHEN ExtrafieldId = 1 AND Value = 1 THEN 1 END) > 0
AND COUNT(CASE WHEN ExtrafieldId = 2 AND Value = 7 THEN 1 END) > 0
AND COUNT(CASE WHEN ExtrafieldId = 3 AND Value = '' THEN 1 END) > 0
AND COUNT(CASE WHEN ExtrafieldId = 4 AND Value = 1999 THEN 1 END) > 0
) D
SET @Time2 = GETDATE()
SELECT @t = COUNT(*)
FROM ( SELECT t1.ListingID
FROM #Listing AS t1
JOIN #Listing AS t2
ON t2.ListingID = t1.ListingID
JOIN #Listing AS t3
ON t3.ListingID = t1.ListingID
JOIN #Listing AS t4
ON t4.ListingID = t1.ListingID
WHERE (t1.ExtraFieldID = 1 AND t1.Value = 1)
AND (t2.ExtraFieldID = 2 AND t2.Value = 7)
AND (t3.ExtraFieldID = 3 AND t3.Value = '')
AND (t4.ExtraFieldID = 4 AND t4.Value = 1999)
) D
INSERT INTO #Results
SELECT DATEDIFF(MICROSECOND, @Time, @Time2) [GroupBy],
DATEDIFF(MICROSECOND, @Time2, GETDATE()) [SelfJoin]
SET @i = @i + 1
END
IF NOT EXISTS (SELECT 1 FROM TempDB.INFORMATION_SCHEMA.TABLES WHERE Table_Name LIKE '#OverallResults%')
BEGIN
CREATE TABLE #OverallResults (GroupBy INT NOT NULL, SelfJoin INT NOT NULL, Iterations INT NOT NULL, Listings INT NOT NULL)
END
INSERT INTO #OverallResults
SELECT AVG(GroupBy) [Group By],
AVG(SelfJoin) [Self Join],
COUNT(*) [Iterations],
@Listings
FROM #Results
SELECT AVG(GroupBy) [Group By],
AVG(SelfJoin) [Self Join],
COUNT(*) [Iterations],
CONVERT(DECIMAL(5, 4), (AVG(GroupBy) - AVG(SelfJoin)) / 1000000.0) [Difference (Seconds)],
CONVERT(DECIMAL(4, 2), 100 * (1 - (1.0 * AVG(SelfJoin) / AVG(GroupBy)))) [Percent Faster]
FROM #Results
DROP TABLE #Listing
DROP TABLE #results
SELECT Records,
Iterations,
GroupBy [Group By],
SelfJoin [Self Join],
CONVERT(DECIMAL(5, 4), (GroupBy - SelfJoin) / 1000000.0) [Difference (Seconds)],
CONVERT(DECIMAL(4, 2), 100 * (1 - (1.0 * SelfJoin / GroupBy))) [Percent Faster]
FROM ( SELECT Listings * 4 [Records],
SUM(Iterations) [Iterations],
SUM(GroupBy * Iterations) / SUM(Iterations) [GroupBy],
SUM(SelfJoin * Iterations) / SUM(Iterations) [SelfJoin]
FROM #OverallResults
GROUP BY Listings
) a
This can be run over and over with different variables. I ran this for 100, 1000, 10000, 100000 and 1000000 listings with 500 select statements on each to get an average execution time and this showed that self joining was on average about 60% faster up until 1,000,000 listings when it became 95% faster. The self join method is clearly the performance winner.