I have a string like:
About \\xee\\x80\\x80John F Kennedy\\xee\\x80\\x81\\xe2\\x80\\x99s Assassination . unsolved my
Scan the input string char-by-char and convert values starting with \x
(string
to byte[]
and back to string
using UTF8 decoder
), leaving all other characters unchanged:
static string Decode(string input)
{
var sb = new StringBuilder();
int position = 0;
var bytes = new List();
while(position < input.Length)
{
char c = input[position++];
if(c == '\\')
{
if(position < input.Length)
{
c = input[position++];
if(c == 'x' && position <= input.Length - 2)
{
var b = Convert.ToByte(input.Substring(position, 2), 16);
position += 2;
bytes.Add(b);
}
else
{
AppendBytes(sb, bytes);
sb.Append('\\');
sb.Append(c);
}
continue;
}
}
AppendBytes(sb, bytes);
sb.Append(c);
}
AppendBytes(sb, bytes);
return sb.ToString();
}
private static void AppendBytes(StringBuilder sb, List bytes)
{
if(bytes.Count != 0)
{
var str = System.Text.Encoding.UTF8.GetString(bytes.ToArray());
sb.Append(str);
bytes.Clear();
}
}
Output:
About John F Kennedy’s Assassination . unsolved mystery – 45 years later. Over the last decade, a lot of individuals have speculated on conspiracy theories that ...