I faced also with the need to parse the data URI scheme. As a result, I improved the regular expression given on this page specifically for C# and which fits any data URI scheme (to check the scheme, you can take it from here or here.
Here is my solution for C#:
private class DataUriModel {
public string MediaType { get; set; }
public string Type { get; set; }
public string[] Tree { get; set; }
public string Subtype { get; set; }
public string Suffix { get; set; }
public string[] Params { get; set; }
public string Encoding { get; set; }
public string Data { get; set; }
}
static void Main(string[] args) {
string s = "data:image/prs.jpeg+gzip;charset=UTF-8;page=21;page=22;base64,/9j/4AAQSkZJRgABAQAAAQABAAD";
var parsedUri = GetDataURI(s);
Console.WriteLine(decodedUri.Type);
Console.WriteLine(decodedUri.Subtype);
Console.WriteLine(decodedUri.Encoding);
}
private static DataUriModel GetDataURI(string data) {
var result = new DataUriModel();
Regex regex = new Regex(@"^\s*data:(?(?[a-z\-]+){1}\/(?([a-z\-]+\.)+)?(?[a-z\-]+){1}(?\+[a-z]+)?(?(;[a-z\-]+\=[a-z0-9\-\+]+)*)?)?(?;base64)?(?,+[a-z0-9\\\!\$\&\'\,\(\)\*\+\,\;\=\-\.\~\:\@\/\?\%\s]*\s*)?$", RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Multiline);
var match = regex.Match(data);
if (!match.Success)
return result;
var names = regex.GetGroupNames();
foreach (var name in names) {
var group = match.Groups[name];
switch (name) {
case "media_type": result.MediaType = group.Value; break;
case "type": result.Type = group.Value; break;
case "tree": result.Tree = !string.IsNullOrWhiteSpace(group.Value) && group.Value.Length > 1 ? group.Value[0..^1].Split(".") : null; break;
case "subtype": result.Subtype = group.Value; break;
case "suffix": result.Suffix = !string.IsNullOrWhiteSpace(group.Value) && group.Value.Length > 1 ? group.Value[1..] : null; break;
case "params": result.Params = !string.IsNullOrWhiteSpace(group.Value) && group.Value.Length > 1 ? group.Value[1..].Split(";") : null; break;
case "encoding": result.Encoding = !string.IsNullOrWhiteSpace(group.Value) && group.Value.Length > 1 ? group.Value[1..] : null; break;
case "data": result.Data = !string.IsNullOrWhiteSpace(group.Value) && group.Value.Length > 1 ? group.Value[1..] : null; break;
}
}
return result;
}