I\'ve got a DB that refers to the following url:
http://en.wikipedia.org/wiki/Herbert_Gr%F6nemeyer
However, it seems that this is a bad URLEncoding, causing
%C3%B6 is proper UTF-8 encoding for ö (o-umlaut). I would assume that %F6 is byte-for-byte copy of byte value for some local encoding of same character (e.g. from code page 1252).
Here's some quick'n'dirty code I cobbled together to make sense of this. Thanks to Josip for pointing me in the right direction:
private string UrlDecode(string input)
{
string unescaped = null;
try
{
unescaped = Uri.UnescapeDataString(input);
}
catch
{
unescaped = input;
for (; ; )
{
var match = Regex.Match(unescaped, @"\%[A-F0-9]{2}");
if (!match.Success)
break;
byte b;
try
{
b = byte.Parse(match.Value.Substring(1), NumberStyles.HexNumber);
}
catch
{
return HttpUtility.UrlDecode(input);
}
var replacement = Encoding.GetEncoding(1252).GetString(new[] { b });
unescaped = unescaped.Substring(0, match.Index) + replacement + unescaped.Substring(match.Index + match.Length);
}
}
return unescaped;
}