Find non-ASCII characters in varchar columns using SQL Server

前端 未结 8 1518
遥遥无期
遥遥无期 2020-12-02 14:38

How can rows with non-ASCII characters be returned using SQL Server?
If you can show how to do it for one column would be great.

I am doing something like this

相关标签:
8条回答
  • 2020-12-02 15:02

    try something like this:

    DECLARE @YourTable table (PK int, col1 varchar(20), col2 varchar(20), col3 varchar(20))
    INSERT @YourTable VALUES (1, 'ok','ok','ok')
    INSERT @YourTable VALUES (2, 'BA'+char(182)+'D','ok','ok')
    INSERT @YourTable VALUES (3, 'ok',char(182)+'BAD','ok')
    INSERT @YourTable VALUES (4, 'ok','ok','B'+char(182)+'AD')
    INSERT @YourTable VALUES (5, char(182)+'BAD','ok',char(182)+'BAD')
    INSERT @YourTable VALUES (6, 'BAD'+char(182),'B'+char(182)+'AD','BAD'+char(182)+char(182)+char(182))
    
    --if you have a Numbers table use that, other wise make one using a CTE
    ;WITH AllNumbers AS
    (   SELECT 1 AS Number
        UNION ALL
        SELECT Number+1
            FROM AllNumbers
            WHERE Number<1000
    )
    SELECT 
        pk, 'Col1' BadValueColumn, CONVERT(varchar(20),col1) AS BadValue --make the XYZ in convert(varchar(XYZ), ...) the largest value of col1, col2, col3
        FROM @YourTable           y
            INNER JOIN AllNumbers n ON n.Number <= LEN(y.col1)
        WHERE ASCII(SUBSTRING(y.col1, n.Number, 1))<32 OR ASCII(SUBSTRING(y.col1, n.Number, 1))>127
    UNION
    SELECT 
        pk, 'Col2' BadValueColumn, CONVERT(varchar(20),col2) AS BadValue --make the XYZ in convert(varchar(XYZ), ...) the largest value of col1, col2, col3
        FROM @YourTable           y
            INNER JOIN AllNumbers n ON n.Number <= LEN(y.col2)
        WHERE ASCII(SUBSTRING(y.col2, n.Number, 1))<32 OR ASCII(SUBSTRING(y.col2, n.Number, 1))>127
    UNION
    SELECT 
        pk, 'Col3' BadValueColumn, CONVERT(varchar(20),col3) AS BadValue --make the XYZ in convert(varchar(XYZ), ...) the largest value of col1, col2, col3
        FROM @YourTable           y
            INNER JOIN AllNumbers n ON n.Number <= LEN(y.col3)
        WHERE ASCII(SUBSTRING(y.col3, n.Number, 1))<32 OR ASCII(SUBSTRING(y.col3, n.Number, 1))>127
    order by 1
    OPTION (MAXRECURSION 1000)
    

    OUTPUT:

    pk          BadValueColumn BadValue
    ----------- -------------- --------------------
    2           Col1           BA¶D
    3           Col2           ¶BAD
    4           Col3           B¶AD
    5           Col1           ¶BAD
    5           Col3           ¶BAD
    6           Col1           BAD¶
    6           Col2           B¶AD
    6           Col3           BAD¶¶¶
    
    (8 row(s) affected)
    
    0 讨论(0)
  • 2020-12-02 15:03

    Here is a UDF I built to detectc columns with extended ascii charaters. It is quick and you can extended the character set you want to check. The second parameter allows you to switch between checking anything outside the standard character set or allowing an extended set:

    create function [dbo].[udf_ContainsNonASCIIChars]
    (
    @string nvarchar(4000),
    @checkExtendedCharset bit
    )
    returns bit
    as
    begin
    
        declare @pos int = 0;
        declare @char varchar(1);
        declare @return bit = 0;
    
        while @pos < len(@string)
        begin
            select @char = substring(@string, @pos, 1)
            if ascii(@char) < 32 or ascii(@char) > 126 
                begin
                    if @checkExtendedCharset = 1
                        begin
                            if ascii(@char) not in (9,124,130,138,142,146,150,154,158,160,170,176,180,181,183,184,185,186,192,193,194,195,196,197,199,200,201,202,203,204,205,206,207,209,210,211,212,213,214,216,217,218,219,220,221,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,248,249,250,251,252,253,254,255)
                                begin
                                    select @return = 1;
                                    select @pos = (len(@string) + 1)
                                end
                            else
                                begin
                                    select @pos = @pos + 1
                                end
                        end
                    else
                        begin
                            select @return = 1;
                            select @pos = (len(@string) + 1)    
                        end
                end
            else
                begin
                    select @pos = @pos + 1
                end
        end
    
        return @return;
    
    end
    

    USAGE:

    select Address1 
    from PropertyFile_English
    where udf_ContainsNonASCIIChars(Address1, 1) = 1
    
    0 讨论(0)
提交回复
热议问题