I\'m trying to parse a file that has the following format:
BEGIN:VEVENT
CREATED:20120504T163940Z
DTEND;TZID=America/Chicago:20120504T130000
DTSTAMP:20120504T1640
edit
This post got me thinking about the iCal format.
Before yesterday, I didn't know what the iCal format was. But, after reading the 1998 spec, its painfully obvious than none of the answers on this page is adequate to parse the content. And, its really too sophisticated even for my general regex below.
With that in mind, here is a solution that parses just the line content, as gleaned from the spec for general line content parsing. Its a step in the right direction, and hopefully someone can benefit. It doesen't do line continuation and does not validate.
C# code
Regex iCalMainRx = new Regex(
@" ^ (? [^[:cntrl:]"";:,\n]+ )
(?
;
(? [^[:cntrl:]"";:,\n]+ )
=
(?
(?: (?:[^\S\n]|[^[:cntrl:]"";:,])* | "" (?:[^\S\n]|[^[:cntrl:]""])* "" )
(?: , (?: (?:[^\S\n]|[^[:cntrl:]"";:,])* | "" (?:[^\S\n]|[^[:cntrl:]""])* "" ) )*
)
)*
:
(? (?:[^\S\n]|[^[:cntrl:]])* )
$ ", RegexOptions.IgnorePatternWhitespace);
Regex iCalPvalRx = new Regex(
@" ^ (? (?:[^\S\n]|[^[:cntrl:]"";:,])* | "" (?:[^\S\n]|[^[:cntrl:]""])* "" )
(?: ,+ (? (?:[^\S\n]|[^[:cntrl:]"";:,])* | "" (?:[^\S\n]|[^[:cntrl:]""])* "" ) )*
$ ", RegexOptions.IgnorePatternWhitespace);
string[] lines = {
"BEGIN:VEVENT",
"CREATED:20120504T163940Z",
"DTEND;TZID=America/Chicago:20120504T130000",
"DTSTAMP:20120504T164000Z",
"DTSTART;TZID=,,,America/Chicago;Next=;last=\"this:;;;:=\";final=:20120504T120000",
"LAST-MODIFIED:20120504T163940Z",
"SEQUENCE:0",
"SUMMARY:Test 1",
"TRANSP:OPAQUE",
"UID:21F61281-FB76-467F-A2CC-A666688BD9B5",
"X-RADICALE-NAME:21F61281-FB76-467F-A2CC-A666688BD9B5.ics",
"END:VEVENT",
};
foreach (string str in lines)
{
Match m_content = iCalMainRx.Match( str );
if (m_content.Success)
{
Console.WriteLine("Key = " + m_content.Groups["name"].Value);
Console.WriteLine("Value = " + m_content.Groups["value"].Value);
CaptureCollection cc_pname = m_content.Groups["param_name"].Captures;
CaptureCollection cc_pvalue = m_content.Groups["param_value"].Captures;
if (cc_pname.Count > 0)
{
Console.WriteLine("Parameters: ");
for (int i = 0; i < cc_pname.Count; i++)
{
// Console.WriteLine("\t'" + cc_pname[i].Value + "' = '" + cc_pvalue[i].Value + "'");
Console.WriteLine("\t'" + cc_pname[i].Value + "' =");
Match m_vals = iCalPvalRx.Match( cc_pvalue[i].Value );
if (m_vals.Success)
{
CaptureCollection cc_vals = m_vals.Groups["pvals"].Captures;
for (int j = 0; j < cc_vals.Count; j++)
{
Console.WriteLine("\t\t'" + cc_vals[j].Value + "'");
}
}
}
}
Console.WriteLine("-------------------------");
}
}
Output
Key = BEGIN
Value = VEVENT
-------------------------
Key = CREATED
Value = 20120504T163940Z
-------------------------
Key = DTEND
Value = 20120504T130000
Parameters:
'TZID' =
'America/Chicago'
-------------------------
Key = DTSTAMP
Value = 20120504T164000Z
-------------------------
Key = DTSTART
Value = 20120504T120000
Parameters:
'TZID' =
''
'America/Chicago'
'Next' =
''
'last' =
'"this:;;;:="'
'final' =
''
-------------------------
Key = LAST-MODIFIED
Value = 20120504T163940Z
-------------------------
Key = SEQUENCE
Value = 0
-------------------------
Key = SUMMARY
Value = Test 1
-------------------------
Key = TRANSP
Value = OPAQUE
-------------------------
Key = UID
Value = 21F61281-FB76-467F-A2CC-A666688BD9B5
-------------------------
Key = X-RADICALE-NAME
Value = 21F61281-FB76-467F-A2CC-A666688BD9B5.ics
-------------------------
Key = END
Value = VEVENT
-------------------------