Check If the string contains accented characters in SQL?

前端 未结 3 1795
无人及你
无人及你 2020-11-27 23:26

I want to perform a task if the input string contain any accented characters else do another task in SQL. Is there any way to check this condition in SQL ?

Eg:

相关标签:
3条回答
  • 2020-11-27 23:54

    SQL Fiddle: http://sqlfiddle.com/#!6/9eecb7d/1607

    declare @a nvarchar(32) = 'àéêöhello!'
    declare @b nvarchar(32) = 'aeeohello!'
    
    select case 
        when (cast(@a as varchar(32)) collate SQL_Latin1_General_Cp1251_CS_AS) = @a 
        then 0 
        else 1 
    end HasSpecialChars
    
    select case 
        when (cast(@b as varchar(32)) collate SQL_Latin1_General_Cp1251_CS_AS) = @b 
        then 0 
        else 1 
    end HasSpecialChars
    

    (based on solution here: How can I remove accents on a string?)

    0 讨论(0)
  • 2020-11-28 00:04

    I use this function to get text (mainly foreign family names) to a more comparable plain latin (with some indiv goodies - feel free to implement your own...).

    Maybe you can use this too. Just compare, if your string is the same as the one passed back.

    CREATE FUNCTION [dbo].[GetRunningNumbers](@anzahl INT=1000000, @StartAt INT=0)
    RETURNS TABLE
    AS 
    RETURN
        SELECT TOP (ISNULL(@anzahl,1000000)) ROW_NUMBER() OVER(ORDER BY A) -1 + ISNULL(@StartAt,0) AS Nmbr
        FROM (VALUES(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) AS tblA(A)
            ,(VALUES(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) AS tblB(B)
            ,(VALUES(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) AS tblC(C)
            ,(VALUES(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) AS tblD(D)
            ,(VALUES(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) AS tblE(E)
            ,(VALUES(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) AS tblF(F);
    
    GO
    CREATE FUNCTION [dbo].[GetTextPlainLatin]
    (
         @Txt VARCHAR(MAX)
        ,@CaseSensitive BIT
        ,@KeepNumbers BIT
        ,@NonCharReplace VARCHAR(100),@MinusReplace VARCHAR(100) 
        ,@PercentReplace VARCHAR(100),@UnderscoreReplace VARCHAR(100) --for SQL-Masks
        ,@AsteriskReplace VARCHAR(100),@QuestionmarkReplace VARCHAR(100) --for SQL-Masks (Access-Style)
    )
    RETURNS VARCHAR(MAX)
    AS
    BEGIN
    
    DECLARE @txtTransformed VARCHAR(MAX)=(SELECT LTRIM(RTRIM(CASE WHEN ISNULL(@CaseSensitive,0)=0 THEN LOWER(@Txt) ELSE @Txt END)));
    RETURN
    (
        SELECT Repl.ASCII_Code
        FROM dbo.GetRunningNumbers(LEN(@txtTransformed),1) AS pos
        --ASCII-Codes of all characters in your text
        CROSS APPLY(SELECT ASCII(SUBSTRING(@txtTransformed,pos.Nmbr,1)) AS ASCII_Code) AS OneChar  
        --re-code 
        CROSS APPLY
        (
            SELECT CASE 
                WHEN OneChar.ASCII_Code BETWEEN ASCII('A') AND ASCII('Z') THEN  CHAR(OneChar.ASCII_Code)
                WHEN OneChar.ASCII_Code BETWEEN ASCII('a') AND ASCII('z') THEN  CHAR(OneChar.ASCII_Code)
                WHEN OneChar.ASCII_Code BETWEEN ASCII('0') AND ASCII('9') AND @KeepNumbers=1 THEN CHAR(OneChar.ASCII_Code)
                WHEN OneChar.ASCII_Code = ASCII('ƒ') THEN 'f'
                WHEN OneChar.ASCII_Code = ASCII('Š') THEN 'S'
                WHEN OneChar.ASCII_Code = ASCII('š') THEN 's'
                WHEN OneChar.ASCII_Code = ASCII('ß') THEN 'ss'
                WHEN OneChar.ASCII_Code = ASCII('Ä') THEN 'Ae'
                WHEN OneChar.ASCII_Code = ASCII('ä') THEN 'ae'
                WHEN OneChar.ASCII_Code = ASCII('Æ') THEN 'Ae'
                WHEN OneChar.ASCII_Code = ASCII('æ') THEN 'ae'
                WHEN OneChar.ASCII_Code = ASCII('Ö') THEN 'Oe'
                WHEN OneChar.ASCII_Code = ASCII('ö') THEN 'oe'
                WHEN OneChar.ASCII_Code = ASCII('Œ') THEN 'Oe'
                WHEN OneChar.ASCII_Code = ASCII('œ') THEN 'oe'
                WHEN OneChar.ASCII_Code = ASCII('Ü') THEN 'Ue'
                WHEN OneChar.ASCII_Code = ASCII('ü') THEN 'ue'
                WHEN OneChar.ASCII_Code = ASCII('Ž') THEN 'Z'
                WHEN OneChar.ASCII_Code = ASCII('ž') THEN 'z'
                WHEN OneChar.ASCII_Code = ASCII('×') THEN 'x'
                WHEN OneChar.ASCII_Code BETWEEN ASCII('À') AND ASCII('Å') THEN  'A'
                WHEN OneChar.ASCII_Code BETWEEN ASCII('à') AND ASCII('å') THEN  'a'
                WHEN OneChar.ASCII_Code = ASCII('Ç') THEN 'C'
                WHEN OneChar.ASCII_Code = ASCII('ç') THEN 'c'
                WHEN OneChar.ASCII_Code BETWEEN ASCII('È') AND ASCII('Ë') THEN  'E'
                WHEN OneChar.ASCII_Code BETWEEN ASCII('è') AND ASCII('ë') THEN  'e'
                WHEN OneChar.ASCII_Code BETWEEN ASCII('Ì') AND ASCII('Ï') THEN  'I'
                WHEN OneChar.ASCII_Code BETWEEN ASCII('ì') AND ASCII('ï') THEN  'i'
                WHEN OneChar.ASCII_Code = ASCII('Ð') THEN 'D' --island Eth
                WHEN OneChar.ASCII_Code = ASCII('ð') THEN 'd' --island eth
                WHEN OneChar.ASCII_Code = ASCII('Ñ') THEN 'N'
                WHEN OneChar.ASCII_Code = ASCII('ñ') THEN 'n'
                WHEN OneChar.ASCII_Code BETWEEN ASCII('Ò') AND ASCII('Ö') THEN 'O'
                WHEN OneChar.ASCII_Code BETWEEN ASCII('ò') AND ASCII('ö') THEN 'o'
                WHEN OneChar.ASCII_Code = ASCII('Ø') THEN 'O'
                WHEN OneChar.ASCII_Code = ASCII('ø') THEN 'o'
                WHEN OneChar.ASCII_Code BETWEEN ASCII('Ù') AND ASCII('Ü') THEN  'U'
                WHEN OneChar.ASCII_Code BETWEEN ASCII('ù') AND ASCII('ü') THEN  'u'
                WHEN OneChar.ASCII_Code = ASCII('Ý') THEN 'Y'
                WHEN OneChar.ASCII_Code = ASCII('ý') THEN 'y'
                WHEN OneChar.ASCII_Code = ASCII('Þ') THEN 'Th' --island Thorn
                WHEN OneChar.ASCII_Code = ASCII('þ') THEN 'th' --island thorn
                WHEN OneChar.ASCII_Code = ASCII('Ÿ') THEN 'Y'
                WHEN OneChar.ASCII_Code = ASCII('ÿ') THEN 'y'
                --Special with "minus"
                WHEN OneChar.ASCII_Code = ASCII('-') THEN ISNULL(@MinusReplace,ISNULL(@NonCharReplace,CHAR(OneChar.ASCII_Code)))
                --Special with mask characters
                WHEN OneChar.ASCII_Code = ASCII('%') THEN ISNULL(@PercentReplace,ISNULL(@NonCharReplace,CHAR(OneChar.ASCII_Code)))
                WHEN OneChar.ASCII_Code = ASCII('_') THEN ISNULL(@UnderscoreReplace,ISNULL(@NonCharReplace,CHAR(OneChar.ASCII_Code)))
                WHEN OneChar.ASCII_Code = ASCII('*') THEN ISNULL(@AsteriskReplace,ISNULL(@NonCharReplace,CHAR(OneChar.ASCII_Code)))
                WHEN OneChar.ASCII_Code = ASCII('?') THEN ISNULL(@QuestionmarkReplace,ISNULL(@NonCharReplace,CHAR(OneChar.ASCII_Code)))
                --replace others        
                ELSE ISNULL(@NonCharReplace,CHAR(OneChar.ASCII_Code))
            END AS ASCII_Code
        ) AS Repl    
        FOR XML PATH(''),TYPE
    ).value('.','varchar(max)');
    END
    GO
    
    SELECT dbo.GetTextPlainLatin('Case sensitive ÄÖ àéêöhello!',1,1,NULL,NULL,NULL,NULL,NULL,NULL);
    SELECT dbo.GetTextPlainLatin('Case in-sensitive ÄÖ àéêöhello!',0,1,NULL,NULL,NULL,NULL,NULL,NULL);
    GO
    DROP FUNCTION dbo.GetTextPlainLatin
    GO
    DROP FUNCTION dbo.GetRunningNumbers;
    
    0 讨论(0)
  • 2020-11-28 00:08

    A Crude way is to check ASCII(<each character>) >= 128 for each character. Something like this

    DECLARE @MyString NVARCHAR(100)
    SET @MyString = N'àéêöhello!'
    
    ;WITH N as 
    (
    SELECT 1 r UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 
    ), Numbers
    as
    (
    SELECT RN = ROW_NUMBER()OVER(ORDER BY N1.r) FROM N as N1 CROSS JOIN N as N2
    )
    SELECT MAX(CASE WHEN ASCII(SUBSTRING(@MyString,RN,1)) >= 128 THEN 1 ELSE 0 END) ContainsAccentedChars FROM Numbers
    WHERE RN <= LEN(@MyString)
    
    0 讨论(0)
提交回复
热议问题