SQL - Select Query for complex dynamic rows

后端未结

关注

 3  1853

I need to retrieve ListingId from the below table based on the search condition. Kindly help the best way to retrive the query for the conditions below

Note : ListingId

相关标签:

3条回答

伪装坚强ぢ

2021-01-24 22:49

SELECT
      t1.ListingID
FROM 
      TableX AS t1 

  JOIN                                --- 2nd JOIN
      TableX AS t2
    ON 
      t2.ListingID = t1.ListingID 

  JOIN                                --- 3rd JOIN
      TableX AS t3
    ON 
      t3.ListingID = t1.ListingID 

WHERE 
      (t1.ExtraFieldID, t1.Value) = (@ExtraFieldID_search1, @Value_search1)

                        --- 2nd condition
  AND 
      (t2.ExtraFieldID, t2.Value) = (@ExtraFieldID_search2, @Value_search2)

                        --- 3rd condition
  AND 
      (t3.ExtraFieldID, t3.Value) = (@ExtraFieldID_search3, @Value_search3)

If you need 3 conditions, you'll need to join the table to itself one more time (so 3 times in total)

0 讨论(0)

不思量自难忘°

2021-01-24 23:03

Use HAVING Instead of Self joins. It is much more efficient as requires no joins and only 1 table scan. It also means if there are multiple conditions it only requires an additional expression in the HAVING Clause rather than an additional join.

e.g. for your second example:

SELECT  ListingID
FROM    [YourTable]
GROUP BY ListingID
HAVING  COUNT(CASE WHEN ExtrafieldId = 1 AND Value = 1 THEN 1 END) > 0
AND     COUNT(CASE WHEN ExtrafieldId = 2 AND Value = 7 THEN 1 END) > 0

ADDENDUM

The above is just plain wrong. I think it is slightly easier on the eye, but the below is much more efficient.

SELECT  t1.ListingID
FROM    Listing AS t1 
        INNER JOIN Listing AS t2
            ON t2.ListingID = t1.ListingID 
        INNER JOIN Listing AS t3
            ON t3.ListingID = t1.ListingID 
        INNER JOIN Listing AS t4
            ON t4.ListingID = t1.ListingID 
WHERE   (t1.ExtraFieldID = 1 AND t1.Value = 1)
AND     (t2.ExtraFieldID = 2 AND t2.Value = 7)
AND     (t3.ExtraFieldID = 3 AND t3.Value = '')
AND     (t4.ExtraFieldID = 4 AND t4.Value = 1999)

To prove this I ran the following code to test it:

DECLARE @Iterations INT, @Listings INT
/*******************************************************************************************************
SET THE PARAMETERS FOR THE TEST HERE, @Listings IS THE NUMBER OF ListingIDs TO INSERT INTO THE SAMPLE
TABLE. EACH LISTING GETS 4 RECORDS SO 10,000 LISTINGS WILL GENERATE A SAMPLE OF 40,000 RECORDS ETC.
@Iterations IS THE NUMBER OF SELECTS TO PERFORM TO TEST THE PERFORMANCE OF EACH METHOD.
*******************************************************************************************************/
SET @Iterations = 500
SET @Listings = 1000000
/*******************************************************************************************************/
/*******************************************************************************************************/

IF EXISTS (SELECT * FROM TempDB.INFORMATION_SCHEMA.TABLES WHERE Table_Name LIKE '#Listing%')
    BEGIN
        DROP TABLE #Listing
    END

CREATE TABLE #Listing (ListingID INT NOT NULL, ExtraFieldID TINYINT NOT NULL, Value VARCHAR(4), PRIMARY KEY (ListingID, ExtraFieldID))

IF EXISTS (SELECT * FROM TempDB.INFORMATION_SCHEMA.TABLES WHERE Table_Name LIKE '#Results%')
    BEGIN
        DROP TABLE #Results
    END

CREATE TABLE #Results (GroupBy INT, SelfJoin INT)

DECLARE @i INT, @Time DATETIME, @Time2 DATETIME, @t INT
SET @i = ISNULL((SELECT MAX(ListingID) + 1 FROM #Listing), 0)
-- FILL LISTING TABLE WITH RANDOM VALUES
WHILE @i < @Listings
    BEGIN
        INSERT #Listing VALUES (@i, 1, ROUND(RAND() * 4, 0))
        INSERT #Listing VALUES (@i, 2, ROUND(RAND() * 20, 0))
        INSERT #Listing VALUES (@i, 3, CASE WHEN ROUND(RAND(), 0) = 0 THEN '' ELSE CONVERT(VARCHAR(4), ROUND(RAND(), 3) * 1000) END)
        INSERT #Listing VALUES (@i, 4, DATEPART(YEAR, DATEADD(YEAR, (RAND()-1) * 100, GETDATE())))

        SET @i = @i + 1
    END

CREATE NONCLUSTERED INDEX #IX_Listing_Value ON #Listing (Value) WITH FILLFACTOR = 100

SET @i = 0
-- PERFORM BOTH METHODS X NUMBER OF TIMES TO GET AN AVERAGE EXECUTION TIME
WHILE @i < @Iterations
    BEGIN
        SET @Time = GETDATE()

        SELECT  @t = COUNT(*)
        FROM    (   SELECT  ListingID
                    FROM    #Listing
                    GROUP BY ListingID
                    HAVING  COUNT(CASE WHEN ExtrafieldId = 1 AND Value = 1 THEN 1 END) > 0
                    AND     COUNT(CASE WHEN ExtrafieldId = 2 AND Value = 7 THEN 1 END) > 0
                    AND     COUNT(CASE WHEN ExtrafieldId = 3 AND Value = '' THEN 1 END) > 0
                    AND     COUNT(CASE WHEN ExtrafieldId = 4 AND Value = 1999 THEN 1 END) > 0
                ) D

        SET @Time2 = GETDATE()

        SELECT  @t = COUNT(*)
        FROM    (   SELECT  t1.ListingID
                    FROM    #Listing AS t1 
                            JOIN #Listing AS t2
                                ON t2.ListingID = t1.ListingID 
                            JOIN #Listing AS t3
                                ON t3.ListingID = t1.ListingID 
                            JOIN #Listing AS t4
                                ON t4.ListingID = t1.ListingID 
                    WHERE   (t1.ExtraFieldID = 1 AND t1.Value = 1)
                    AND     (t2.ExtraFieldID = 2 AND t2.Value = 7)
                    AND     (t3.ExtraFieldID = 3 AND t3.Value = '')
                    AND     (t4.ExtraFieldID = 4 AND t4.Value = 1999)
                ) D

        INSERT INTO #Results
        SELECT  DATEDIFF(MICROSECOND, @Time, @Time2) [GroupBy],
                DATEDIFF(MICROSECOND, @Time2, GETDATE()) [SelfJoin]

        SET @i = @i + 1
    END

IF NOT EXISTS (SELECT 1 FROM TempDB.INFORMATION_SCHEMA.TABLES WHERE Table_Name LIKE '#OverallResults%')
    BEGIN
        CREATE TABLE #OverallResults (GroupBy INT NOT NULL, SelfJoin INT NOT NULL, Iterations INT NOT NULL, Listings INT NOT NULL)
    END
INSERT INTO #OverallResults
SELECT  AVG(GroupBy) [Group By],
        AVG(SelfJoin) [Self Join],
        COUNT(*) [Iterations],
        @Listings
FROM    #Results

SELECT  AVG(GroupBy) [Group By],
        AVG(SelfJoin) [Self Join],
        COUNT(*) [Iterations],
        CONVERT(DECIMAL(5, 4), (AVG(GroupBy) - AVG(SelfJoin)) / 1000000.0) [Difference (Seconds)],
        CONVERT(DECIMAL(4, 2), 100 * (1 - (1.0 * AVG(SelfJoin) / AVG(GroupBy)))) [Percent Faster]
FROM    #Results

DROP TABLE #Listing
DROP TABLE #results

SELECT  Records,    
        Iterations,
        GroupBy [Group By],
        SelfJoin [Self Join],
        CONVERT(DECIMAL(5, 4), (GroupBy - SelfJoin) / 1000000.0) [Difference (Seconds)],
        CONVERT(DECIMAL(4, 2), 100 * (1 - (1.0 * SelfJoin / GroupBy))) [Percent Faster]
FROM    (   SELECT  Listings * 4 [Records], 
                    SUM(Iterations) [Iterations],
                    SUM(GroupBy * Iterations) / SUM(Iterations) [GroupBy],
                    SUM(SelfJoin * Iterations) / SUM(Iterations) [SelfJoin]
            FROM    #OverallResults
            GROUP BY Listings
        ) a

This can be run over and over with different variables. I ran this for 100, 1000, 10000, 100000 and 1000000 listings with 500 select statements on each to get an average execution time and this showed that self joining was on average about 60% faster up until 1,000,000 listings when it became 95% faster. The self join method is clearly the performance winner.

0 讨论(0)

野的像风

2021-01-24 23:08

You can use union and distinct quite easy for this. If you're using the ListingId as an input to another query using an IN-clause you don't have to mind the duplicates otherwise you can add

SELECT DISTINCT ListingId FROM (
  SELECT
    ListingId
  ... -- the rest from below
) AS Data

Here's the query to get the listing (with possible duplicates!):

SELECT
  ListingID
FROM
  TABLE_NAME
WHERE
  ExtrafieldId = 1 and Value = 1
UNION ALL
SELECT
  ListingID
FROM
  TABLE_NAME
WHERE
  ExtrafieldId = 1 AND Value = 1 AND ExtrafieldId = 2 and Value = 7
UNION ALL
SELECT
  ListingID
FROM
  TABLE_NAME
WHERE
  ExtrafieldId = 4 AND Value = 1999

0 讨论(0)