问题
This query works as intended, but it's just really slow. Does anyone here have recommendations to improve performance?
I essentially just creating a temp table to store all the table and column names, and cycling through them via a WHILE statement, to create dynamic inserts into another table with the details I want.
My latest run took about 21 minutes, which isn't entirely terrible (considering the task), but I'd love to get some input on how/where it can be fine tuned.
USE <DATABASE>;
IF NOT EXISTS(SELECT *
FROM sys.schemas WHERE name='temp')
BEGIN
EXEC ('CREATE SCHEMA temp');
END;
IF OBJECT_ID('temp.columns') IS NOT NULL
BEGIN
DROP TABLE temp.columns
END;
SELECT [table_name]
, [column_name]
, [data_type]
, [is_nullable]
, [numeric_scale]
, [ordinal_position]
INTO [temp].[columns]
FROM information_schema.columns c
WHERE table_schema = 'dbo'
-- AND table_name = 'CONTACTS'
;
IF OBJECT_ID('_TableColumnsUsed') IS NOT NULL
BEGIN
DROP TABLE _TableColumnsUsed
END;
CREATE TABLE _TableColumnsUsed (Table_Name VARCHAR(255) NULL, Column_Position INT, Column_Name VARCHAR(255) NULL, Min_Value VARCHAR(MAX) NULL, Max_Value VARCHAR(MAX) NULL);
DECLARE
@CurrentTable VARCHAR(255)
, @CurrentColumn VARCHAR(255)
, @CurrentIsNullable VARCHAR(3)
, @CurrentNumeric BIT
, @CurrentPosition INT
, @SQL VARCHAR(MAX);
WHILE
(
SELECT COUNT(1)
FROM temp.columns
) > 0
BEGIN
SELECT TOP 1 @CurrentTable = [Table_Name]
, @CurrentColumn = [Column_Name]
, @CurrentIsNullable = [is_nullable]
, @CurrentNumeric = IIF([numeric_scale] IS NULL, 0, 1)
, @CurrentPosition = [ordinal_position]
FROM temp.columns c
WHERE [table_name] NOT IN ('_TableColumnsUsed')
ORDER BY [table_name]
, [ordinal_position];
SET @SQL = 'INSERT INTO _TableColumnsUsed (Table_Name, Column_Position, Column_Name, Min_Value, Max_Value)
SELECT Table_Name = '''+@CurrentTable+'''
, Column_Position = '+CAST(@CurrentPosition AS VARCHAR(3))+'
, Column_Name = '''+@CurrentColumn+'''
, Min_Value = MIN(CAST('+@CurrentColumn+' AS VARCHAR(MAX)))
, Max_Value = MAX(CAST('+@CurrentColumn+' AS VARCHAR(MAX)))
FROM '+@CurrentTable+'
WHERE '+IIF(@CurrentIsNullable = 'NO', '1=1',
CASE
WHEN @CurrentNumeric = 0
THEN 'ISNULL(CAST('+@CurrentColumn+' AS VARCHAR(MAX)),'''') <> '''''
WHEN @CurrentNumeric = 1
THEN 'ISNULL('+@CurrentColumn+',0.00) <> 0.00'
ELSE '1=1'
END);
EXEC (@SQL);
DELETE c
FROM [temp].[columns] [c]
WHERE [c].[table_name] = @CurrentTable
AND [c].[column_name] = @CurrentColumn;
END;
/* -- Dynamic SQL Output Example
SELECT Table_Name = 'CONTACTS'
, Column_Position = 17
, Column_Name = 'ZIP'
, Min_Value = MIN(CAST(ZIP AS VARCHAR(MAX)))
, Max_Value = MAX(CAST(ZIP AS VARCHAR(MAX)))
FROM CONTACTS
WHERE 1=1
*/
SELECT Table_Name, Column_Position, Column_Name, Min_Value, Max_Value
FROM _TableColumnsUsed;
回答1:
Try this, it should work in seconds:
DECLARE @cmd NVARCHAR(MAX)=
(
SELECT STUFF(
(
SELECT ' UNION ALL SELECT ''' + c.TABLE_SCHEMA + ''' AS TableSchema '
+ ',''' + c.TABLE_NAME + ''' AS TableName '
+ ',''' + c.COLUMN_NAME + ''' AS ColumnName '
+ ',''' + c.DATA_TYPE + ''' AS ColumnType '
+ ',CAST(MIN(' + QUOTENAME(c.COLUMN_NAME)+') AS NVARCHAR(MAX)) AS MinValue '
+ ',CAST(MAX(' + QUOTENAME(c.COLUMN_NAME)+') AS NVARCHAR(MAX)) AS MaxValue '
+ ' FROM ' + QUOTENAME(c.TABLE_SCHEMA) + '.' + QUOTENAME(c.TABLE_NAME)
+ ' WHERE ' + QUOTENAME(c.COLUMN_NAME) + ' IS NOT NULL'
FROM INFORMATION_SCHEMA.COLUMNS AS c
WHERE c.DATA_TYPE IN('bigint','float','int','datetime') --add all types you want to check, be aware of implicit conversions!
FOR XML PATH(''),TYPE
).value('.','nvarchar(max)'),1,10,'')
);
--PRINT @cmd
EXEC(@cmd);
The statement creates an all-in-one UNION ALL
query which is executed via EXEC
You can uncomment the PRINT
to see the executed statement.
回答2:
Can't say that it will run any faster, depends on the size of your database but here's a modded version of a profiling script I created that should meet your needs.
DECLARE @sqlStatement NVARCHAR(MAX) = '';
WITH CTE AS
(
SELECT
SCH.[name] AS [Schema]
,TAB.[name] AS [Table]
,COL.[name] AS [Column]
FROM sys.columns AS COL
JOIN sys.tables AS TAB
ON COL.[object_id] = TAB.[object_id]
JOIN sys.schemas AS SCH
ON TAB.[schema_id] = SCH.[schema_id]
WHERE COL.system_type_id NOT IN (104,240)
)
SELECT @sqlStatement +=
'UNION ALL
SELECT
'''+[Schema]+''' AS [Schema]
,'''+[Table]+''' AS [Table]
,'''+[Column]+''' AS [Column]
,CONVERT(NVARCHAR(MAX),MAX(['+[Column]+'])) AS ColumnMax
,CONVERT(NVARCHAR(MAX),MIN(['+[Column]+'])) AS ColumnMin
FROM ['+[Schema]+'].['+[Table]+']
'
FROM CTE
;
SET @sqlStatement = STUFF(@sqlStatement,1,10,'');
EXEC sp_executesql @sqlStatement;
来源:https://stackoverflow.com/questions/49674844/finding-min-and-max-values-for-all-table-columns