Please read before marking as duplicate
I have not been able to create or find a RegEx that works for all IPv6 formats (my test cases are b
As far as I research, there is no RegEx that works for all IPv6 formats. Even there is, it is so complex and hard to maintain (not easily readable). Besides, it may cause performance problems too. Hence I have decided to write a method (function) for this. You can easily add any special cases as you wish too. I have written it in C#, but I think you can convert this algorithm to any language:
class IPv6Validator
{
string charValidator = @"[A-Fa-f0-9]";
string IPv4Validation = @"^((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$";
public bool IsIPv6(string maybeIPv6)
{
if (maybeIPv6 == "::")
{
return true;
}
int numberOfEmptyDigitGroups = 0;
int expectedDigitGroupsLength = 8;
string[] arrMaybeIPv6 = maybeIPv6.Split(':');
if (arrMaybeIPv6.Length > 9 || arrMaybeIPv6.Length < 3)
{
return false;
}
for (int i = 0; i < arrMaybeIPv6.Length; i++)
{
//IF IPv6 starts or ends with "::" (ex ::1)
if ((i == 0 || i == arrMaybeIPv6.Length - 2) && IsEmptyDigitGroup(arrMaybeIPv6[i]) && IsEmptyDigitGroup(arrMaybeIPv6[i+1]))
{
expectedDigitGroupsLength = 9;
numberOfEmptyDigitGroups++;
i++;
}
else if (arrMaybeIPv6[i].Trim() == string.Empty) //If IPv6 contains :: (ex 1:2::3)
{
numberOfEmptyDigitGroups++;
}
//Cannot have more than one "::" (ex ::1:2::3)
if (numberOfEmptyDigitGroups > 1)
{
return false;
}
//Mapped IPv4 control
if (i == arrMaybeIPv6.Length - 1 && IsIPv4(arrMaybeIPv6[i]) && arrMaybeIPv6.Length < 8)
{
return true;
}
else if (i == arrMaybeIPv6.Length - 1 && HasSpecialCharInIPv6(arrMaybeIPv6[i], IsEmptyDigitGroup(arrMaybeIPv6[i - 1]))) //If last digit group contains special char (ex fe80::3%eth0)
{
return true;
}
else //if not IPV4, check the digits
{
//Cannot have more than 4 digits (ex 12345:1::)
if (arrMaybeIPv6[i].Length > 4)
{
return false;
}
//Check if it has unvalid char
foreach (char ch in arrMaybeIPv6[i])
{
if (!IsIPv6Char(ch.ToString()))
{
return false;
}
}
}
//Checks if it has extra digit (ex 1:2:3:4:5:6:7:8f:)
if (i >= expectedDigitGroupsLength)
{
return false;
}
//If it has missing digit at last or end (ex 1:2:3:4:5:6:7:)
if ((i == 0 || i == arrMaybeIPv6.Length - 1) && IsEmptyDigitGroup(arrMaybeIPv6[i]) && expectedDigitGroupsLength != 9)
{
return false;
}
//If it has missing digits (ex 1:2:3:4:5:6)
if (i == arrMaybeIPv6.Length - 1 && numberOfEmptyDigitGroups == 0 && arrMaybeIPv6.Length < 8)
{
return false;
}
}
return true;
}
bool IsIPv4(string lastDigitGroup)
{
//If lastDigitGroup has special char, then get the first group for IPV4 validation (ex ::123.12.2.1/60)
string maybeIPv4 = lastDigitGroup.Split('/','%')[0];
Match match = Regex.Match(maybeIPv4, IPv4Validation);
return match.Success;
}
bool IsIPv6Char(string strChar)
{
Match match = Regex.Match(strChar, charValidator);
return match.Success;
}
bool IsSpecialChar(char ch)
{
if (ch == '%' || ch == '/')
{
return true;
}
return false;
}
bool HasSpecialCharInIPv6(string lastDigitGroup, bool isPreviousDigitGroupEmpty)
{
for (int i = 0; i < lastDigitGroup.Length; i++)
{
//If cannot find any special char at first 5 chars then leave the for loop
if (i == 5)
break;
//If the first digit is special char, check the previous digits to be sure it is a valid IPv6 (ex FE80::/10)
if (i == 0 && IsSpecialChar(lastDigitGroup[i]) && isPreviousDigitGroupEmpty)
return true;
if (i != 0 && IsSpecialChar(lastDigitGroup[i]))
return true;
if (!IsIPv6Char(lastDigitGroup[i].ToString()))
return false;
}
return false;
}
bool IsEmptyDigitGroup(string digitGroup)
{
if (digitGroup.Trim() == string.Empty)
return true;
return false;
}
}
I also added other methods like how to search IPv6 in text or file too. You can check: Regular expression that matches valid IPv6 addresses
Edit Summary: Ipv4 mapped and special chars have been covered like "::123.23.23.23", "fe80::3%eth0", "::ffff:192.1.56.10/96".
::
is a valid IPv6 address (the all-zeroes address), so why not accept it?
And if you don't want to accepts IPv6 addresses with the last 32 bits written in IPv4 notation (why wouldn't you, they are valid address representations) then just revoke the last part of the regex that deals with them (starting with ::(ffff
).
Anyway, the regex does indeed contain a few errors in the IPv4-notation part. The IPv4 notation is just a different way to write the last 32 bits of the IPv6 address, and the regex doesn't handle all valid variants of that. Besides, it even forgets to escape the .
so it will also accept many invalid strings.
With much help from @nhahtdh in this answer https://stackoverflow.com/a/21943960/3112803 I have found breaking it up to be the best solution. Below is an example of how to do it in PL/SQL
, but it could be done this way in other languages. I'll do the same in ColdFusion
. For PL/SQL
the pattern needed to stay under 512 characters so breaking it up works great and it is simple to understand. It passed all my test cases in the original question.
if (
/* IPv6 expanded */
REGEXP_LIKE(v, '\A[[:xdigit:]]{1,4}(:[[:xdigit:]]{1,4}){7}\z')
/* IPv6 shorthand */
OR (NOT REGEXP_LIKE(v, '\A(.*?[[:xdigit:]](:|\z)){8}')
AND REGEXP_LIKE(v, '\A([[:xdigit:]]{1,4}(:[[:xdigit:]]{1,4}){0,6})?::([[:xdigit:]]{1,4}(:[[:xdigit:]]{1,4}){0,6})?\z'))
/* IPv6 dotted-quad notation, expanded */
OR REGEXP_LIKE(v, '\A[[:xdigit:]]{1,4}(:[[:xdigit:]]{1,4}){5}:(25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9]?[0-9])(\.(25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9]?[0-9])){3}\z')
/* IPv6 dotted-quad notation, shorthand */
OR (NOT REGEXP_LIKE(v, '\A(.*?[[:xdigit:]]:){6}')
AND REGEXP_LIKE(v, '\A([[:xdigit:]]{1,4}(:[[:xdigit:]]{1,4}){0,4})?::([[:xdigit:]]{1,4}:){0,5}(25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9]?[0-9])(\.(25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9]?[0-9])){3}\z'))
) then