SQL - Select Query for complex dynamic rows

后端 未结 3 1855
你的背包
你的背包 2021-01-24 22:12

I need to retrieve ListingId from the below table based on the search condition. Kindly help the best way to retrive the query for the conditions below

Note : ListingId

3条回答
  •  不思量自难忘°
    2021-01-24 23:03

    Use HAVING Instead of Self joins. It is much more efficient as requires no joins and only 1 table scan. It also means if there are multiple conditions it only requires an additional expression in the HAVING Clause rather than an additional join.

    e.g. for your second example:

    SELECT  ListingID
    FROM    [YourTable]
    GROUP BY ListingID
    HAVING  COUNT(CASE WHEN ExtrafieldId = 1 AND Value = 1 THEN 1 END) > 0
    AND     COUNT(CASE WHEN ExtrafieldId = 2 AND Value = 7 THEN 1 END) > 0
    

    ADDENDUM

    The above is just plain wrong. I think it is slightly easier on the eye, but the below is much more efficient.

    SELECT  t1.ListingID
    FROM    Listing AS t1 
            INNER JOIN Listing AS t2
                ON t2.ListingID = t1.ListingID 
            INNER JOIN Listing AS t3
                ON t3.ListingID = t1.ListingID 
            INNER JOIN Listing AS t4
                ON t4.ListingID = t1.ListingID 
    WHERE   (t1.ExtraFieldID = 1 AND t1.Value = 1)
    AND     (t2.ExtraFieldID = 2 AND t2.Value = 7)
    AND     (t3.ExtraFieldID = 3 AND t3.Value = '')
    AND     (t4.ExtraFieldID = 4 AND t4.Value = 1999)
    

    To prove this I ran the following code to test it:

    DECLARE @Iterations INT, @Listings INT
    /*******************************************************************************************************
    SET THE PARAMETERS FOR THE TEST HERE, @Listings IS THE NUMBER OF ListingIDs TO INSERT INTO THE SAMPLE
    TABLE. EACH LISTING GETS 4 RECORDS SO 10,000 LISTINGS WILL GENERATE A SAMPLE OF 40,000 RECORDS ETC.
    @Iterations IS THE NUMBER OF SELECTS TO PERFORM TO TEST THE PERFORMANCE OF EACH METHOD.
    *******************************************************************************************************/
    SET @Iterations = 500
    SET @Listings = 1000000
    /*******************************************************************************************************/
    /*******************************************************************************************************/
    
    IF EXISTS (SELECT * FROM TempDB.INFORMATION_SCHEMA.TABLES WHERE Table_Name LIKE '#Listing%')
        BEGIN
            DROP TABLE #Listing
        END
    
    CREATE TABLE #Listing (ListingID INT NOT NULL, ExtraFieldID TINYINT NOT NULL, Value VARCHAR(4), PRIMARY KEY (ListingID, ExtraFieldID))
    
    IF EXISTS (SELECT * FROM TempDB.INFORMATION_SCHEMA.TABLES WHERE Table_Name LIKE '#Results%')
        BEGIN
            DROP TABLE #Results
        END
    
    CREATE TABLE #Results (GroupBy INT, SelfJoin INT)
    
    DECLARE @i INT, @Time DATETIME, @Time2 DATETIME, @t INT
    SET @i = ISNULL((SELECT MAX(ListingID) + 1 FROM #Listing), 0)
    -- FILL LISTING TABLE WITH RANDOM VALUES
    WHILE @i < @Listings
        BEGIN
            INSERT #Listing VALUES (@i, 1, ROUND(RAND() * 4, 0))
            INSERT #Listing VALUES (@i, 2, ROUND(RAND() * 20, 0))
            INSERT #Listing VALUES (@i, 3, CASE WHEN ROUND(RAND(), 0) = 0 THEN '' ELSE CONVERT(VARCHAR(4), ROUND(RAND(), 3) * 1000) END)
            INSERT #Listing VALUES (@i, 4, DATEPART(YEAR, DATEADD(YEAR, (RAND()-1) * 100, GETDATE())))
    
            SET @i = @i + 1
        END
    
    CREATE NONCLUSTERED INDEX #IX_Listing_Value ON #Listing (Value) WITH FILLFACTOR = 100
    
    SET @i = 0
    -- PERFORM BOTH METHODS X NUMBER OF TIMES TO GET AN AVERAGE EXECUTION TIME
    WHILE @i < @Iterations
        BEGIN
            SET @Time = GETDATE()
    
            SELECT  @t = COUNT(*)
            FROM    (   SELECT  ListingID
                        FROM    #Listing
                        GROUP BY ListingID
                        HAVING  COUNT(CASE WHEN ExtrafieldId = 1 AND Value = 1 THEN 1 END) > 0
                        AND     COUNT(CASE WHEN ExtrafieldId = 2 AND Value = 7 THEN 1 END) > 0
                        AND     COUNT(CASE WHEN ExtrafieldId = 3 AND Value = '' THEN 1 END) > 0
                        AND     COUNT(CASE WHEN ExtrafieldId = 4 AND Value = 1999 THEN 1 END) > 0
                    ) D
    
            SET @Time2 = GETDATE()
    
            SELECT  @t = COUNT(*)
            FROM    (   SELECT  t1.ListingID
                        FROM    #Listing AS t1 
                                JOIN #Listing AS t2
                                    ON t2.ListingID = t1.ListingID 
                                JOIN #Listing AS t3
                                    ON t3.ListingID = t1.ListingID 
                                JOIN #Listing AS t4
                                    ON t4.ListingID = t1.ListingID 
                        WHERE   (t1.ExtraFieldID = 1 AND t1.Value = 1)
                        AND     (t2.ExtraFieldID = 2 AND t2.Value = 7)
                        AND     (t3.ExtraFieldID = 3 AND t3.Value = '')
                        AND     (t4.ExtraFieldID = 4 AND t4.Value = 1999)
                    ) D
    
            INSERT INTO #Results
            SELECT  DATEDIFF(MICROSECOND, @Time, @Time2) [GroupBy],
                    DATEDIFF(MICROSECOND, @Time2, GETDATE()) [SelfJoin]
    
            SET @i = @i + 1
        END
    
    IF NOT EXISTS (SELECT 1 FROM TempDB.INFORMATION_SCHEMA.TABLES WHERE Table_Name LIKE '#OverallResults%')
        BEGIN
            CREATE TABLE #OverallResults (GroupBy INT NOT NULL, SelfJoin INT NOT NULL, Iterations INT NOT NULL, Listings INT NOT NULL)
        END
    INSERT INTO #OverallResults
    SELECT  AVG(GroupBy) [Group By],
            AVG(SelfJoin) [Self Join],
            COUNT(*) [Iterations],
            @Listings
    FROM    #Results
    
    SELECT  AVG(GroupBy) [Group By],
            AVG(SelfJoin) [Self Join],
            COUNT(*) [Iterations],
            CONVERT(DECIMAL(5, 4), (AVG(GroupBy) - AVG(SelfJoin)) / 1000000.0) [Difference (Seconds)],
            CONVERT(DECIMAL(4, 2), 100 * (1 - (1.0 * AVG(SelfJoin) / AVG(GroupBy)))) [Percent Faster]
    FROM    #Results
    
    DROP TABLE #Listing
    DROP TABLE #results
    
    SELECT  Records,    
            Iterations,
            GroupBy [Group By],
            SelfJoin [Self Join],
            CONVERT(DECIMAL(5, 4), (GroupBy - SelfJoin) / 1000000.0) [Difference (Seconds)],
            CONVERT(DECIMAL(4, 2), 100 * (1 - (1.0 * SelfJoin / GroupBy))) [Percent Faster]
    FROM    (   SELECT  Listings * 4 [Records], 
                        SUM(Iterations) [Iterations],
                        SUM(GroupBy * Iterations) / SUM(Iterations) [GroupBy],
                        SUM(SelfJoin * Iterations) / SUM(Iterations) [SelfJoin]
                FROM    #OverallResults
                GROUP BY Listings
            ) a
    

    This can be run over and over with different variables. I ran this for 100, 1000, 10000, 100000 and 1000000 listings with 500 select statements on each to get an average execution time and this showed that self joining was on average about 60% faster up until 1,000,000 listings when it became 95% faster. The self join method is clearly the performance winner.

提交回复
热议问题