SQL Split function that handles string with delimeter appearing between text qualifiers?

后端 未结 3 761
庸人自扰
庸人自扰 2021-01-25 08:09

There are several SQL split functions, from loop driven, to using xml commands, and even using a numbers table. I haven\'t found one that supports text qualifiers.

Usin

相关标签:
3条回答
  • 2021-01-25 08:53
    CREATE FUNCTION [dbo].[udfSplit]
    (
        @nvcString nvarchar(max),
        @nvcDelimiter nvarchar(1),
        @nvcTQ nvarchar(1)
    )
    RETURNS @tblTokens TABLE (
                                Token nvarchar(max)
                                )
    AS
    BEGIN
    
        DECLARE @intCounter int
        DECLARE @nvcToken nvarchar(4000)
        DECLARE @nvcCurrentChar nvarchar(1)
        DECLARE @intStart int
    
        IF @nvcString <> ''
            BEGIN
                SET @intCounter = 1
                SET @nvcToken = ''
                SET @intStart = 0
    
                --Loop through each character of the string
                WHILE @intCounter <= LEN(@nvcString)
                    BEGIN
                        SET @nvcCurrentChar = SUBSTRING(@nvcString, @intCounter, 1)
    
                        --If current char is TQ
                        IF @nvcCurrentChar = @nvcTQ
                            BEGIN
                                --Concatonate to token
                                SET @nvcToken = @nvcToken + @nvcCurrentChar
    
                                --If this is the end TQ
                                IF @intStart <> 0
                                    BEGIN
                                        --Fix TQ
                                        SET @nvcToken = dbo.udfRemoveTQFromToken(@nvcToken, @nvcTQ)
    
                                        IF @nvcToken <> ''
                                            BEGIN
                                                INSERT INTO @tblTokens (Token) VALUES (@nvcToken)
                                                SET @nvcToken = '' 
                                            END
                                        --Reset TQ
                                        SET @intStart = 0 
                                    END
                                ELSE
                                    BEGIN
                                        SET @nvcToken = dbo.udfRemoveTQFromToken(@nvcToken, @nvcTQ)
    
                                        IF @nvcToken <> ''
                                            BEGIN
                                                INSERT INTO @tblTokens (Token) VALUES (@nvcToken)
                                                SET @nvcToken = '' 
                                            END
    
                                        --Mark TQ start position
                                        SET @intStart = @intCounter  
                                    END
                            END
                        ELSE IF @intStart = 0 AND @nvcCurrentChar = @nvcDelimiter
                            BEGIN
                                --If not inside TQ, and char is Delimiter
                                SET @nvcToken = dbo.udfRemoveTQFromToken(@nvcToken, @nvcTQ)
    
                                IF @nvcToken <> ''
                                    BEGIN
                                        INSERT INTO @tblTokens (Token) VALUES (@nvcToken)
                                        SET @nvcToken = '' 
                                    END
                            END
                        ELSE
                            BEGIN
                                --Current char is not TQ or Delim, add to current token
                                SET @nvcToken = @nvcToken + @nvcCurrentChar
                            END
    
                        SET @intCounter = @intCounter + 1
                    END
            END
    
        SET @nvcToken = dbo.udfRemoveTQFromToken(@nvcToken, @nvcTQ)
    
        IF @nvcToken <> ''
            BEGIN
                --Current Token has not been added to table
                INSERT INTO @tblTokens (Token) VALUES (@nvcToken)
            END
    
        RETURN
    END
    
    GO
    
    
    CREATE FUNCTION [dbo].[udfRemoveTQFromToken]
    (
        @nvcToken nvarchar(4000),
        @nvcTQ nvarchar(1)
    )
    RETURNS nvarchar(4000) AS
    BEGIN
    
        DECLARE @nvcReturn nvarchar(4000)
    
        --Trim token, needs to be done first, 
        --as we dont want to trim any spaces within the TQ
        --unless it was malformed
        SET @nvcReturn = LTRIM(RTRIM(@nvcToken))
    
        --If Left char is TQ
        IF LEFT(@nvcReturn, 1) = @nvcTQ
            BEGIN
                --Though both cases perform the removal of the left most char (opening TQ)
                --We need to perform a trim after removal ONLY if it was malformed
                IF RIGHT(@nvcReturn, 1) <> @nvcTQ   
                    BEGIN
                        --But no matching end TQ, malformed
                        --fix by removing left most char (the opening TQ)
                        SET @nvcReturn = RIGHT(@nvcReturn, LEN(@nvcReturn) - 1)
                        --Reapply the LTRIM, incase there were spaces after the opening TQ
                        SET @nvcReturn = LTRIM(@nvcReturn)
                    END
                ELSE
                    BEGIN
                        --has matching end TQ, well-formed
                        --fix by removing left most char (the opening TQ)
                        SET @nvcReturn = RIGHT(@nvcReturn, LEN(@nvcReturn) - 1)
                    END
            END
    
        --Remove the right most char (the closing TQ)
        IF RIGHT(@nvcReturn, 1) = @nvcTQ
            SET @nvcReturn = LEFT(@nvcReturn, LEN(@nvcReturn) - 1)
    
        RETURN @nvcReturn
    END
    
    0 讨论(0)
  • 2021-01-25 08:58

    Here is my solution:

    CREATE FUNCTION fnSplitString
    (
        @input nvarchar(MAX) 
    )
    RETURNS @emails TABLE
    (
        email nvarchar(MAX) 
    )
    AS
    BEGIN
    
    DECLARE @len int = LEN(@input)
    DECLARE @pos int = 1;
    DECLARE @start int = 1;
    DECLARE @ignore bit = 0;
    WHILE(@pos<=@len)
    BEGIN
    
        DECLARE @ch nchar(1) = SUBSTRING(@input, @pos, 1);
    
        IF ( @ch = '"' or @ch = '''')
        BEGIN
            SET @ignore = 1 - @ignore;
        END
    
        IF (@ch = ',' AND @ignore = 0)
        BEGIN
            INSERT @emails VALUES (SUBSTRING(@input, @start, @pos-@start));
            SET @start = @pos+1;
        END
    
        SET @pos = @pos + 1;
    END
    
    IF (@start<>@pos)
    BEGIN
        INSERT @emails VALUES (SUBSTRING(@input, @start, @pos-@start));
    END
    
    RETURN
    END
    GO
    
    DECLARE @input nvarchar(max) = 'jsmith@anywhere.com, "Sally \"Heat\" Jones" <sally@anywhere.com>, "Mark Jones" <mjones@anywhere.com>, "Stone, Ron" <rstone@anywhere.com>';
    
    select * from fnSplitString(@input)
    
    0 讨论(0)
  • 2021-01-25 09:05

    This is a quick solution, and it is less than perfect, it has no stack, so it will treat the comma inside the quotes as the delimiter.

    alter function fnSplit
    (
        @Delim char(1),
        @List nvarchar(4000)
    )
    returns table as
    return
        with 
        Strings(PosIdx) as 
        (
            select 1 
            union all 
            select PosIdx + 1 from Strings where PosIdx < 4000
        )
        select
            ltrim(rtrim(substring(@List, PosIdx, charindex(@Delim, @List + @Delim, PosIdx) - PosIdx))) as value
        from   
            Strings
        where  
            PosIdx <= convert(int, len(@List))
        and substring(@Delim + @List, PosIdx, 1) = @Delim 
    go
    select * from fnSplit(',', 'jsmith@anywhere.com, "Sally \"Heat\" Jones" <sally@anywhere.com>, "Mark Jones" <mjones@anywhere.com>, "Stone, Ron" <rstone@anywhere.com>') 
        option (maxrecursion 0)
    
    0 讨论(0)
提交回复
热议问题