I\'ve seen many different ways to create and populate a numbers table. However, what is the best way to create and populate one? With \"best\" being defined from most to l
This is a repackaging of the accepted answer - but in a way that lets you compare them all to each other for yourself - the top 3 algorithms are compared (and comments explain why other methods are excluded) and you can run against your own setup to see how they each perform with the size of sequence that you desire.
SET NOCOUNT ON;
--
-- Set the count of numbers that you want in your sequence ...
--
DECLARE @NumberOfNumbers int = 10000000;
--
-- Some notes on choosing a useful length for your sequence ...
-- For a sequence of 100 numbers -- winner depends on preference of min/max/avg runtime ... (I prefer PhilKelley algo here - edit the algo so RowSet2 is max RowSet CTE)
-- For a sequence of 1k numbers -- winner depends on preference of min/max/avg runtime ... (Sadly PhilKelley algo is generally lowest ranked in this bucket, but could be tweaked to perform better)
-- For a sequence of 10k numbers -- a clear winner emerges for this bucket
-- For a sequence of 100k numbers -- do not test any looping methods at this size or above ...
-- the previous winner fails, a different method is need to guarantee the full sequence desired
-- For a sequence of 1MM numbers -- the statistics aren't changing much between the algorithms - choose one based on your own goals or tweaks
-- For a sequence of 10MM numbers -- only one of the methods yields the desired sequence, and the numbers are much closer than for smaller sequences
DECLARE @TestIteration int = 0;
DECLARE @MaxIterations int = 10;
DECLARE @MethodName varchar(128);
-- SQL SERVER 2017 Syntax/Support needed
DROP TABLE IF EXISTS #TimingTest
CREATE TABLE #TimingTest (MethodName varchar(128), TestIteration int, StartDate DateTime2, EndDate DateTime2, ElapsedTime decimal(38,0), ItemCount decimal(38,0), MaxNumber decimal(38,0), MinNumber decimal(38,0))
--
-- Conduct the test ...
--
WHILE @TestIteration < @MaxIterations
BEGIN
-- Be sure that the test moves forward
SET @TestIteration += 1;
/* -- This method has been removed, as it is BY FAR, the slowest method
-- This test shows that, looping should be avoided, likely at all costs, if one places a value / premium on speed of execution ...
--
-- METHOD - Fast looping
--
-- Prep for the test
DROP TABLE IF EXISTS [Numbers].[Test];
CREATE TABLE [Numbers].[Test] (Number INT NOT NULL);
-- Method information
SET @MethodName = 'FastLoop';
-- Record the start of the test
INSERT INTO #TimingTest(MethodName, TestIteration, StartDate)
SELECT @MethodName, @TestIteration, GETDATE()
-- Run the algorithm
DECLARE @i INT = 1;
WHILE @i <= @NumberOfNumbers
BEGIN
INSERT INTO [Numbers].[Test](Number) VALUES (@i);
SELECT @i = @i + 1;
END;
ALTER TABLE [Numbers].[Test] ADD CONSTRAINT PK_Numbers_Test_Number PRIMARY KEY CLUSTERED (Number)
-- Record the end of the test
UPDATE tt
SET
EndDate = GETDATE()
FROM #TimingTest tt
WHERE tt.MethodName = @MethodName
and tt.TestIteration = @TestIteration
-- And the stats about the numbers in the sequence
UPDATE tt
SET
ItemCount = results.ItemCount,
MaxNumber = results.MaxNumber,
MinNumber = results.MinNumber
FROM #TimingTest tt
CROSS JOIN (
SELECT COUNT(Number) as ItemCount, MAX(Number) as MaxNumber, MIN(Number) as MinNumber FROM [Numbers].[Test]
) results
WHERE tt.MethodName = @MethodName
and tt.TestIteration = @TestIteration
*/
/* -- This method requires GO statements, which would break the script, also - this answer does not appear to be the fastest *AND* seems to perform "magic"
--
-- METHOD - "Semi-Looping"
--
-- Prep for the test
DROP TABLE IF EXISTS [Numbers].[Test];
CREATE TABLE [Numbers].[Test] (Number INT NOT NULL);
-- Method information
SET @MethodName = 'SemiLoop';
-- Record the start of the test
INSERT INTO #TimingTest(MethodName, TestIteration, StartDate)
SELECT @MethodName, @TestIteration, GETDATE()
-- Run the algorithm
INSERT [Numbers].[Test] values (1);
-- GO --required
INSERT [Numbers].[Test] SELECT Number + (SELECT COUNT(*) FROM [Numbers].[Test]) FROM [Numbers].[Test]
-- GO 14 --will create 16384 total rows
ALTER TABLE [Numbers].[Test] ADD CONSTRAINT PK_Numbers_Test_Number PRIMARY KEY CLUSTERED (Number)
-- Record the end of the test
UPDATE tt
SET
EndDate = GETDATE()
FROM #TimingTest tt
WHERE tt.MethodName = @MethodName
and tt.TestIteration = @TestIteration
-- And the stats about the numbers in the sequence
UPDATE tt
SET
ItemCount = results.ItemCount,
MaxNumber = results.MaxNumber,
MinNumber = results.MinNumber
FROM #TimingTest tt
CROSS JOIN (
SELECT COUNT(Number) as ItemCount, MAX(Number) as MaxNumber, MIN(Number) as MinNumber FROM [Numbers].[Test]
) results
WHERE tt.MethodName = @MethodName
and tt.TestIteration = @TestIteration
*/
--
-- METHOD - Philip Kelley's algo
-- (needs tweaking to match the desired length of sequence in order to optimize its performance, relies more on the coder to properly tweak the algorithm)
--
-- Prep for the test
DROP TABLE IF EXISTS [Numbers].[Test];
CREATE TABLE [Numbers].[Test] (Number INT NOT NULL);
-- Method information
SET @MethodName = 'PhilKelley';
-- Record the start of the test
INSERT INTO #TimingTest(MethodName, TestIteration, StartDate)
SELECT @MethodName, @TestIteration, GETDATE()
-- Run the algorithm
; WITH
RowSet0 as (select 1 as Item union all select 1), -- 2 rows -- We only have to name the column in the first select, the second/union select inherits the column name
RowSet1 as (select 1 as Item from RowSet0 as A, RowSet0 as B), -- 4 rows
RowSet2 as (select 1 as Item from RowSet1 as A, RowSet1 as B), -- 16 rows
RowSet3 as (select 1 as Item from RowSet2 as A, RowSet2 as B), -- 256 rows
RowSet4 as (select 1 as Item from RowSet3 as A, RowSet3 as B), -- 65536 rows (65k)
RowSet5 as (select 1 as Item from RowSet4 as A, RowSet4 as B), -- 4294967296 rows (4BB)
-- Add more RowSetX to get higher and higher numbers of rows
-- Each successive RowSetX results in squaring the previously available number of rows
Tally as (select row_number() over (order by Item) as Number from RowSet5) -- This is what gives us the sequence of integers, always select from the terminal CTE expression
-- Note: testing of this specific use case has shown that making Tally as a sub-query instead of a terminal CTE expression is slower (always) - be sure to follow this pattern closely for max performance
INSERT INTO [Numbers].[Test] (Number)
SELECT o.Number
FROM Tally o
WHERE o.Number <= @NumberOfNumbers
ALTER TABLE [Numbers].[Test] ADD CONSTRAINT PK_Numbers_Test_Number PRIMARY KEY CLUSTERED (Number)
-- Record the end of the test
UPDATE tt
SET
EndDate = GETDATE()
FROM #TimingTest tt
WHERE tt.MethodName = @MethodName
and tt.TestIteration = @TestIteration
-- And the stats about the numbers in the sequence
UPDATE tt
SET
ItemCount = results.ItemCount,
MaxNumber = results.MaxNumber,
MinNumber = results.MinNumber
FROM #TimingTest tt
CROSS JOIN (
SELECT COUNT(Number) as ItemCount, MAX(Number) as MaxNumber, MIN(Number) as MinNumber FROM [Numbers].[Test]
) results
WHERE tt.MethodName = @MethodName
and tt.TestIteration = @TestIteration
--
-- METHOD - Mladen Prajdic answer
--
-- Prep for the test
DROP TABLE IF EXISTS [Numbers].[Test];
CREATE TABLE [Numbers].[Test] (Number INT NOT NULL);
-- Method information
SET @MethodName = 'MladenPrajdic';
-- Record the start of the test
INSERT INTO #TimingTest(MethodName, TestIteration, StartDate)
SELECT @MethodName, @TestIteration, GETDATE()
-- Run the algorithm
INSERT INTO [Numbers].[Test](Number)
SELECT TOP (@NumberOfNumbers) row_number() over(order by t1.number) as N
FROM master..spt_values t1
CROSS JOIN master..spt_values t2
ALTER TABLE [Numbers].[Test] ADD CONSTRAINT PK_Numbers_Test_Number PRIMARY KEY CLUSTERED (Number)
-- Record the end of the test
UPDATE tt
SET
EndDate = GETDATE()
FROM #TimingTest tt
WHERE tt.MethodName = @MethodName
and tt.TestIteration = @TestIteration
-- And the stats about the numbers in the sequence
UPDATE tt
SET
ItemCount = results.ItemCount,
MaxNumber = results.MaxNumber,
MinNumber = results.MinNumber
FROM #TimingTest tt
CROSS JOIN (
SELECT COUNT(Number) as ItemCount, MAX(Number) as MaxNumber, MIN(Number) as MinNumber FROM [Numbers].[Test]
) results
WHERE tt.MethodName = @MethodName
and tt.TestIteration = @TestIteration
--
-- METHOD - Single INSERT
--
-- Prep for the test
DROP TABLE IF EXISTS [Numbers].[Test];
-- The Table creation is part of this algorithm ...
-- Method information
SET @MethodName = 'SingleInsert';
-- Record the start of the test
INSERT INTO #TimingTest(MethodName, TestIteration, StartDate)
SELECT @MethodName, @TestIteration, GETDATE()
-- Run the algorithm
SELECT TOP (@NumberOfNumbers) IDENTITY(int,1,1) AS Number
INTO [Numbers].[Test]
FROM sys.objects s1 -- use sys.columns if you don't get enough rows returned to generate all the numbers you need
CROSS JOIN sys.objects s2 -- use sys.columns if you don't get enough rows returned to generate all the numbers you need
ALTER TABLE [Numbers].[Test] ADD CONSTRAINT PK_Numbers_Test_Number PRIMARY KEY CLUSTERED (Number)
-- Record the end of the test
UPDATE tt
SET
EndDate = GETDATE()
FROM #TimingTest tt
WHERE tt.MethodName = @MethodName
and tt.TestIteration = @TestIteration
-- And the stats about the numbers in the sequence
UPDATE tt
SET
ItemCount = results.ItemCount,
MaxNumber = results.MaxNumber,
MinNumber = results.MinNumber
FROM #TimingTest tt
CROSS JOIN (
SELECT COUNT(Number) as ItemCount, MAX(Number) as MaxNumber, MIN(Number) as MinNumber FROM [Numbers].[Test]
) results
WHERE tt.MethodName = @MethodName
and tt.TestIteration = @TestIteration
END
-- Calculate the timespan for each of the runs
UPDATE tt
SET
ElapsedTime = DATEDIFF(MICROSECOND, StartDate, EndDate)
FROM #TimingTest tt
--
-- Report the results ...
--
SELECT
MethodName, AVG(ElapsedTime) / AVG(ItemCount) as TimePerRecord, CAST(AVG(ItemCount) as bigint) as SequenceLength,
MAX(ElapsedTime) as MaxTime, MIN(ElapsedTime) as MinTime,
MAX(MaxNumber) as MaxNumber, MIN(MinNumber) as MinNumber
FROM #TimingTest tt
GROUP by tt.MethodName
ORDER BY TimePerRecord ASC, MaxTime ASC, MinTime ASC