The now-released Roslyn project provides Microsoft.CodeAnalysis.CSharp.SyntaxFacts
, with SyntaxFacts.IsIdentifierStartCharacter(char)
and SyntaxFacts.IsIdentifierPartCharacter(char)
methods just like Java.
Here it is in use, in a simple function I use to turn noun phrases (eg "Start Date") into C# identifiers (eg "StartDate"). N.B I'm using Humanizer to do the camel-case conversion, and Roslyn to check whether a character is valid.
public static string Identifier(string name)
{
Check.IsNotNullOrWhitespace(name, nameof(name));
// trim off leading and trailing whitespace
name = name.Trim();
// should deal with spaces => camel casing;
name = name.Dehumanize();
var sb = new StringBuilder();
if (!SyntaxFacts.IsIdentifierStartCharacter(name[0]))
{
// the first characters
sb.Append("_");
}
foreach(var ch in name)
{
if (SyntaxFacts.IsIdentifierPartCharacter(ch))
{
sb.Append(ch);
}
}
var result = sb.ToString();
if (SyntaxFacts.GetKeywordKind(result) != SyntaxKind.None)
{
result = @"@" + result;
}
return result;
}
Tests;
[TestCase("Start Date", "StartDate")]
[TestCase("Bad*chars", "BadChars")]
[TestCase(" leading ws", "LeadingWs")]
[TestCase("trailing ws ", "TrailingWs")]
[TestCase("class", "Class")]
[TestCase("int", "Int")]
[Test]
public void CSharp_GeneratesDecentIdentifiers(string input, string expected)
{
Assert.AreEqual(expected, CSharp.Identifier(input));
}