Extract email address from string using tsql

后端 未结 6 1426
面向向阳花
面向向阳花 2021-01-13 09:01

I\'m trying to extract email addresses from an existing comments field and put it into its own column. The string may be something like this \"this is an example comment wit

相关标签:
6条回答
  • 2021-01-13 09:25

    You can search for '@' in the string. Then you get the string at the LEFT and RIGHT side of '@'. You then want to REVERSE the LEFT side and get first occurrence of ' ' then get the SUBSTRING from there. Then REVERSE it to get the original form. Same principle apply to the RIGHT side without doing REVERSE.

    Example string: 'some text someemail@domain.org some text'

    1. LEFT = 'some text someemail'
    2. RIGHT = '@domain.org some text'
    3. Reverse LEFT = 'liameemos txet emos'
    4. SUBSTRING up to the first space = 'liameemos'
    5. REVERSE(4) = someemail
    6. SUBSTRING (2) up to the first space = '@domain.org'
    7. Combine 5 and 6 = 'someemail@domain.org'

    Your query would be:

    ;WITH CteEmail(email) AS(
        SELECT 'someemail@domain.org' UNION ALL
        SELECT 'some text someemail@domain.org some text' UNION ALL
        SELECT 'no email'
    )
    ,CteStrings AS(
        SELECT
            [Left] = LEFT(email, CHARINDEX('@', email, 0) - 1),
            Reverse_Left = REVERSE(LEFT(email, CHARINDEX('@', email, 0) - 1)),
            [Right] = RIGHT(email, CHARINDEX('@', email, 0) + 1)
        FROM CteEmail
        WHERE email LIKE '%@%'
    )
    SELECT *,
        REVERSE(
            SUBSTRING(Reverse_Left, 0, 
                CASE
                    WHEN CHARINDEX(' ', Reverse_Left, 0) = 0 THEN LEN(Reverse_Left) + 1
                    ELSE CHARINDEX(' ', Reverse_Left, 0)
                END
            )
        )
        +
        SUBSTRING([Right], 0,
            CASE
                WHEN CHARINDEX(' ', [Right], 0) = 0 THEN LEN([Right]) + 1
                ELSE CHARINDEX(' ', [Right], 0)
            END
        )
    FROM CteStrings
    

    Sample Data:

    email
    ----------------------------------------
    someemail@domain.org
    some text someemail@domain.org some text
    no email
    

    Result

    ---------------------
    someemail@domain.org
    someemail@domain.org
    
    0 讨论(0)
  • 2021-01-13 09:26

    This one line would also work (a bit long line though lol):

    --declare @a varchar(100) 
    --set @a = 'a asfd saasd asdfgh@asd.com wqe z zx cxzc '
    select substring(substring(@a,0,charindex('@',@a)),len(substring(@a,0,charindex('@',@a)))-charindex(' ',reverse(substring(@a,0,charindex('@',@a))))+2,len(substring(@a,0,charindex('@',@a)))) + substring(substring(@a,charindex('@',@a),len(@a)),0,charindex(' ',substring(@a,charindex('@',@a),len(@a))))
    
    0 讨论(0)
  • 2021-01-13 09:26

    For strings that contain new line characters I modified Felix's answer using PATINDEX to search for the first control character rather than white space.

    I also had to modify the Right field to subtract the correct amount of text.

        WITH CteEmail(email) AS(
            SELECT 'example string with new lines
    
        Email: some.example@email.address.com
    (first email address - should be returned)
    
        Email: another@test.co.uk
    (other email addresses should be ignored
    
    more example text' UNION ALL
            SELECT 'Email: some.example@email.address.com' UNION ALL
            SELECT 'someemail@domain.org' UNION ALL
            SELECT 'some text someemail@domain.org some text' UNION ALL
            SELECT 'no email'
        )
        ,CteStrings AS(
            SELECT
                [Left] = LEFT(email, CHARINDEX('@', email, 0) - 1),
                Reverse_Left = REVERSE(LEFT(email, CHARINDEX('@', email, 0) - 1)),
                [Right] = RIGHT(email, LEN(email) - CHARINDEX('@', email, 0) + 1 )
            FROM CteEmail
            WHERE email LIKE '%@%'
        )
        SELECT *,
            REVERSE(
                SUBSTRING(Reverse_Left, 0, 
                    CASE
                        WHEN PATINDEX('%[' + CHAR(10)+'- ]%', Reverse_Left) = 0 THEN LEN(Reverse_Left) + 1
                        ELSE PATINDEX('%[' + CHAR(0)+'- ]%', Reverse_Left)
                    END
                )
            )
            +
            SUBSTRING([Right], 0,
                CASE
                    WHEN PATINDEX('%[' + CHAR(0)+'- ]%', [Right]) = 0 THEN LEN([Right]) + 1
                    ELSE PATINDEX('%[' + CHAR(0)+'- ]%', [Right])
                END
            )
        FROM CteStrings
    
    0 讨论(0)
  • 2021-01-13 09:27
    DECLARE @t TABLE (row_id INT, email VARCHAR(100))
    
    INSERT @t (row_id, email)
    VALUES (1, 'drgkls<ivan@gvi.ru>, info@gvi.com, @ dgh507-16-65@'),
            (2, 'hjshfkjshfj@kjs.kjsehf herwfjewr@kjsd.com adjfhja@.com u3483dhj@hhb@.dfj'),
            (3, 'kjsdghfjs4254.23detygh@jhjdfg.dgb лдоврывплдоо isgfsi@ klsdfksdl@,dd.com')
    
    DECLARE @pat VARCHAR(100) = '%[^a-z0-9@._ ]%';
    
    WITH f AS (
             SELECT    row_id,
                     CAST(' ' + email + ' ' AS VARCHAR(102)) email,
                     SUBSTRING(email, PATINDEX(@pat, email), 1) bad,
                     PATINDEX(@pat, email) pat
             FROM    @t
             UNION ALL
             SELECT    row_id,
                     CAST(REPLACE(email, bad, ' ') AS VARCHAR(102)),
                     SUBSTRING(REPLACE(email, bad, ' '), PATINDEX(@pat, REPLACE(email, bad, ' ')), 1) bad,
                     PATINDEX(@pat, REPLACE(email, bad, ' '))
             FROM    f
             WHERE    PATINDEX(@pat, email) > 0
         ),
         s AS 
         (
             SELECT    row_id,
                     email, PATINDEX('%@%', email) pos 
             FROM    f 
             WHERE    pat = 0
                     AND    PATINDEX('%@%', email) > 0
             UNION ALL
             SELECT    row_id,
                     SUBSTRING(email, pos + 1, 102), 
                     PATINDEX('%@%', SUBSTRING(email, pos + 1, 102))
             FROM    s
             WHERE    PATINDEX('%@%', SUBSTRING(email, pos + 1, 102)) > 0
         )
    
    SELECT  row_id, o1 + pp
    FROM    s   
            CROSS APPLY (SELECT    REVERSE(LEFT(email, pos -1)) s1) x
            CROSS APPLY (SELECT    CHARINDEX(' ', s1) i1) y
            CROSS APPLY (SELECT    REVERSE(LEFT(s1, i1 -1)) o1 WHERE i1 > 0) z
            CROSS APPLY (SELECT    CHARINDEX(' ', email, pos) i2) e
            CROSS APPLY (SELECT    SUBSTRING(email, pos, i2 -pos) pp WHERE    i2 > pos + 1) q
    WHERE    LEN(o1) > 1
            AND CHARINDEX('.', pp) > 0
            AND PATINDEX('%@%@%', pp) = 0
            AND PATINDEX('%@.%', pp) = 0
            AND PATINDEX('%.', pp) = 0
    
    0 讨论(0)
  • 2021-01-13 09:30

    I know wewesthemenace already answered the question, but his/her solution seems over complicated. Why concatenate the left and right sides of the email address together? I'd rather just find the beginning and the end of the email address and then use substring to return the email address like so:

    My Table

    DECLARE @Table TABLE (comment NVARCHAR(50));
    INSERT INTO @Table
    VALUES ('blah MyEmailAddress@domain.org'),            --At the end
            ('blah MyEmailAddress@domain.org blah blah'), --In the middle
            ('MyEmailAddress@domain.org blah'),           --At the beginning
            ('no email');
    

    Actual Query:

    SELECT  comment,        
            CASE
                WHEN CHARINDEX('@',comment) = 0 THEN NULL
                ELSE SUBSTRING(comment,beginningOfEmail,endOfEmail-beginningOfEmail)
            END email
    FROM @Table
    CROSS APPLY (SELECT CHARINDEX(' ',comment + ' ',CHARINDEX('@',comment))) AS A(endOfEmail)
    CROSS APPLY (SELECT DATALENGTH(comment)/2 - CHARINDEX(' ',REVERSE(' ' + comment),CHARINDEX('@',REVERSE(' ' + comment))) + 2) AS B(beginningOfEmail)
    

    Results:

    comment                                            email
    -------------------------------------------------- --------------------------------------------------
    blah MyEmailAddress@domain.org                     MyEmailAddress@domain.org
    blah MyEmailAddress@domain.org blah blah           MyEmailAddress@domain.org
    MyEmailAddress@domain.org blah                     MyEmailAddress@domain.org
    no email                                           NULL
    
    0 讨论(0)
  • 2021-01-13 09:31

    Stephan's answer is great when looking for a single email address in each row.

    However, I was running into this error when trying to get multiple email addresses in each row:

    Invalid length parameter passed to the LEFT or SUBSTRING function

    I used this answer from DBA Stack Exchange to get all of the positions of @ inside the string. It entails a table-valued function that returns the number of positions equal to the number a certain pattern inside the string. I also had to modify the CROSS APPLY functions to handle multiple email addresses as well.

    My Table:

    DECLARE @Table TABLE (comment VARCHAR(500));
    INSERT INTO @Table (comment)
    VALUES ('blah blah My.EmailAddress@domain.org more blah someemailaddress@domain.com even more blah asdf@gmail.com'),
           ('blah hello.world@domain.org more'),
           ('no email')
    

    Table-valued Function:

    CREATE FUNCTION dbo.fnFindPatternLocation
    (
        @string NVARCHAR(MAX),
        @term   NVARCHAR(255)
    )
    RETURNS TABLE
    AS
        RETURN 
        (
            SELECT pos = Number - LEN(@term) 
            FROM (SELECT Number, Item = LTRIM(RTRIM(SUBSTRING(@string, Number, 
            CHARINDEX(@term, @string + @term, Number) - Number)))
            FROM (SELECT ROW_NUMBER() OVER (ORDER BY [object_id])
            FROM sys.all_objects) AS n(Number)
            WHERE Number > 1 AND Number <= CONVERT(INT, LEN(@string))
            AND SUBSTRING(@term + @string, Number, LEN(@term)) = @term
        ) AS y);
    GO
    

    Query:

    SELECT comment, pos, SUBSTRING(comment,beginningOfEmail,endOfEmail-beginningOfEmail) AS email
    FROM @Table
    CROSS APPLY (SELECT pos FROM dbo.fnFindPatternLocation(comment, '@')) AS A(pos)
    CROSS APPLY (SELECT CHARINDEX(' ',comment + ' ', pos)) AS B(endOfEmail)
    CROSS APPLY (SELECT pos - CHARINDEX(' ', REVERSE(SUBSTRING(comment, 1, pos))) + 2) AS C(beginningOfEmail)
    

    Results:

    comment
    ---------------------------------------------------------------------------------------------------------
    blah blah My.EmailAddress@domain.org more blah someemailaddress@domain.com even more blah asdf@gmail.com
    blah blah My.EmailAddress@domain.org more blah someemailaddress@domain.com even more blah asdf@gmail.com
    blah blah My.EmailAddress@domain.org more blah someemailaddress@domain.com even more blah asdf@gmail.com
    blah hello.world@domain.org more
    

    pos    email
    ---    ------------------------------
    26     My.EmailAddress@domain.org
    64     someemailaddress@domain.com
    95     asdf@gmail.com
    17     hello.world@domain.org
    
    0 讨论(0)
提交回复
热议问题