I\'m trying to extract email addresses from an existing comments field and put it into its own column. The string may be something like this \"this is an example comment wit
You can search for '@'
in the string. Then you get the string at the LEFT
and RIGHT
side of '@'
. You then want to REVERSE
the LEFT
side and get first occurrence of ' '
then get the SUBSTRING
from there. Then REVERSE
it to get the original form. Same principle apply to the RIGHT
side without doing REVERSE
.
Example string: 'some text someemail@domain.org some text'
LEFT
= 'some text someemail'RIGHT
= '@domain.org some text'SUBSTRING
up to the first space = 'liameemos'REVERSE
(4) = someemailSUBSTRING
(2) up to the first space = '@domain.org'Your query would be:
;WITH CteEmail(email) AS(
SELECT 'someemail@domain.org' UNION ALL
SELECT 'some text someemail@domain.org some text' UNION ALL
SELECT 'no email'
)
,CteStrings AS(
SELECT
[Left] = LEFT(email, CHARINDEX('@', email, 0) - 1),
Reverse_Left = REVERSE(LEFT(email, CHARINDEX('@', email, 0) - 1)),
[Right] = RIGHT(email, CHARINDEX('@', email, 0) + 1)
FROM CteEmail
WHERE email LIKE '%@%'
)
SELECT *,
REVERSE(
SUBSTRING(Reverse_Left, 0,
CASE
WHEN CHARINDEX(' ', Reverse_Left, 0) = 0 THEN LEN(Reverse_Left) + 1
ELSE CHARINDEX(' ', Reverse_Left, 0)
END
)
)
+
SUBSTRING([Right], 0,
CASE
WHEN CHARINDEX(' ', [Right], 0) = 0 THEN LEN([Right]) + 1
ELSE CHARINDEX(' ', [Right], 0)
END
)
FROM CteStrings
Sample Data:
email
----------------------------------------
someemail@domain.org
some text someemail@domain.org some text
no email
Result
---------------------
someemail@domain.org
someemail@domain.org
This one line would also work (a bit long line though lol):
--declare @a varchar(100)
--set @a = 'a asfd saasd asdfgh@asd.com wqe z zx cxzc '
select substring(substring(@a,0,charindex('@',@a)),len(substring(@a,0,charindex('@',@a)))-charindex(' ',reverse(substring(@a,0,charindex('@',@a))))+2,len(substring(@a,0,charindex('@',@a)))) + substring(substring(@a,charindex('@',@a),len(@a)),0,charindex(' ',substring(@a,charindex('@',@a),len(@a))))
For strings that contain new line characters I modified Felix's answer using PATINDEX
to search for the first control character rather than white space.
I also had to modify the Right
field to subtract the correct amount of text.
WITH CteEmail(email) AS(
SELECT 'example string with new lines
Email: some.example@email.address.com
(first email address - should be returned)
Email: another@test.co.uk
(other email addresses should be ignored
more example text' UNION ALL
SELECT 'Email: some.example@email.address.com' UNION ALL
SELECT 'someemail@domain.org' UNION ALL
SELECT 'some text someemail@domain.org some text' UNION ALL
SELECT 'no email'
)
,CteStrings AS(
SELECT
[Left] = LEFT(email, CHARINDEX('@', email, 0) - 1),
Reverse_Left = REVERSE(LEFT(email, CHARINDEX('@', email, 0) - 1)),
[Right] = RIGHT(email, LEN(email) - CHARINDEX('@', email, 0) + 1 )
FROM CteEmail
WHERE email LIKE '%@%'
)
SELECT *,
REVERSE(
SUBSTRING(Reverse_Left, 0,
CASE
WHEN PATINDEX('%[' + CHAR(10)+'- ]%', Reverse_Left) = 0 THEN LEN(Reverse_Left) + 1
ELSE PATINDEX('%[' + CHAR(0)+'- ]%', Reverse_Left)
END
)
)
+
SUBSTRING([Right], 0,
CASE
WHEN PATINDEX('%[' + CHAR(0)+'- ]%', [Right]) = 0 THEN LEN([Right]) + 1
ELSE PATINDEX('%[' + CHAR(0)+'- ]%', [Right])
END
)
FROM CteStrings
DECLARE @t TABLE (row_id INT, email VARCHAR(100))
INSERT @t (row_id, email)
VALUES (1, 'drgkls<ivan@gvi.ru>, info@gvi.com, @ dgh507-16-65@'),
(2, 'hjshfkjshfj@kjs.kjsehf herwfjewr@kjsd.com adjfhja@.com u3483dhj@hhb@.dfj'),
(3, 'kjsdghfjs4254.23detygh@jhjdfg.dgb лдоврывплдоо isgfsi@ klsdfksdl@,dd.com')
DECLARE @pat VARCHAR(100) = '%[^a-z0-9@._ ]%';
WITH f AS (
SELECT row_id,
CAST(' ' + email + ' ' AS VARCHAR(102)) email,
SUBSTRING(email, PATINDEX(@pat, email), 1) bad,
PATINDEX(@pat, email) pat
FROM @t
UNION ALL
SELECT row_id,
CAST(REPLACE(email, bad, ' ') AS VARCHAR(102)),
SUBSTRING(REPLACE(email, bad, ' '), PATINDEX(@pat, REPLACE(email, bad, ' ')), 1) bad,
PATINDEX(@pat, REPLACE(email, bad, ' '))
FROM f
WHERE PATINDEX(@pat, email) > 0
),
s AS
(
SELECT row_id,
email, PATINDEX('%@%', email) pos
FROM f
WHERE pat = 0
AND PATINDEX('%@%', email) > 0
UNION ALL
SELECT row_id,
SUBSTRING(email, pos + 1, 102),
PATINDEX('%@%', SUBSTRING(email, pos + 1, 102))
FROM s
WHERE PATINDEX('%@%', SUBSTRING(email, pos + 1, 102)) > 0
)
SELECT row_id, o1 + pp
FROM s
CROSS APPLY (SELECT REVERSE(LEFT(email, pos -1)) s1) x
CROSS APPLY (SELECT CHARINDEX(' ', s1) i1) y
CROSS APPLY (SELECT REVERSE(LEFT(s1, i1 -1)) o1 WHERE i1 > 0) z
CROSS APPLY (SELECT CHARINDEX(' ', email, pos) i2) e
CROSS APPLY (SELECT SUBSTRING(email, pos, i2 -pos) pp WHERE i2 > pos + 1) q
WHERE LEN(o1) > 1
AND CHARINDEX('.', pp) > 0
AND PATINDEX('%@%@%', pp) = 0
AND PATINDEX('%@.%', pp) = 0
AND PATINDEX('%.', pp) = 0
I know wewesthemenace already answered the question, but his/her solution seems over complicated. Why concatenate the left and right sides of the email address together? I'd rather just find the beginning and the end of the email address and then use substring to return the email address like so:
DECLARE @Table TABLE (comment NVARCHAR(50));
INSERT INTO @Table
VALUES ('blah MyEmailAddress@domain.org'), --At the end
('blah MyEmailAddress@domain.org blah blah'), --In the middle
('MyEmailAddress@domain.org blah'), --At the beginning
('no email');
SELECT comment,
CASE
WHEN CHARINDEX('@',comment) = 0 THEN NULL
ELSE SUBSTRING(comment,beginningOfEmail,endOfEmail-beginningOfEmail)
END email
FROM @Table
CROSS APPLY (SELECT CHARINDEX(' ',comment + ' ',CHARINDEX('@',comment))) AS A(endOfEmail)
CROSS APPLY (SELECT DATALENGTH(comment)/2 - CHARINDEX(' ',REVERSE(' ' + comment),CHARINDEX('@',REVERSE(' ' + comment))) + 2) AS B(beginningOfEmail)
Results:
comment email
-------------------------------------------------- --------------------------------------------------
blah MyEmailAddress@domain.org MyEmailAddress@domain.org
blah MyEmailAddress@domain.org blah blah MyEmailAddress@domain.org
MyEmailAddress@domain.org blah MyEmailAddress@domain.org
no email NULL
Stephan's answer is great when looking for a single email address in each row.
However, I was running into this error when trying to get multiple email addresses in each row:
Invalid length parameter passed to the LEFT or SUBSTRING function
I used this answer from DBA Stack Exchange to get all of the positions of @
inside the string. It entails a table-valued function that returns the number of positions equal to the number a certain pattern inside the string. I also had to modify the CROSS APPLY
functions to handle multiple email addresses as well.
My Table:
DECLARE @Table TABLE (comment VARCHAR(500));
INSERT INTO @Table (comment)
VALUES ('blah blah My.EmailAddress@domain.org more blah someemailaddress@domain.com even more blah asdf@gmail.com'),
('blah hello.world@domain.org more'),
('no email')
Table-valued Function:
CREATE FUNCTION dbo.fnFindPatternLocation
(
@string NVARCHAR(MAX),
@term NVARCHAR(255)
)
RETURNS TABLE
AS
RETURN
(
SELECT pos = Number - LEN(@term)
FROM (SELECT Number, Item = LTRIM(RTRIM(SUBSTRING(@string, Number,
CHARINDEX(@term, @string + @term, Number) - Number)))
FROM (SELECT ROW_NUMBER() OVER (ORDER BY [object_id])
FROM sys.all_objects) AS n(Number)
WHERE Number > 1 AND Number <= CONVERT(INT, LEN(@string))
AND SUBSTRING(@term + @string, Number, LEN(@term)) = @term
) AS y);
GO
Query:
SELECT comment, pos, SUBSTRING(comment,beginningOfEmail,endOfEmail-beginningOfEmail) AS email
FROM @Table
CROSS APPLY (SELECT pos FROM dbo.fnFindPatternLocation(comment, '@')) AS A(pos)
CROSS APPLY (SELECT CHARINDEX(' ',comment + ' ', pos)) AS B(endOfEmail)
CROSS APPLY (SELECT pos - CHARINDEX(' ', REVERSE(SUBSTRING(comment, 1, pos))) + 2) AS C(beginningOfEmail)
Results:
comment
---------------------------------------------------------------------------------------------------------
blah blah My.EmailAddress@domain.org more blah someemailaddress@domain.com even more blah asdf@gmail.com
blah blah My.EmailAddress@domain.org more blah someemailaddress@domain.com even more blah asdf@gmail.com
blah blah My.EmailAddress@domain.org more blah someemailaddress@domain.com even more blah asdf@gmail.com
blah hello.world@domain.org more
pos email
--- ------------------------------
26 My.EmailAddress@domain.org
64 someemailaddress@domain.com
95 asdf@gmail.com
17 hello.world@domain.org