Does C# have built-in support for parsing strings of page numbers? By page numbers, I mean the format you might enter into a print dialog that\'s a mixture of comma and das
Below is the code I just put together to do this.. You can enter in the format like.. 1-2,5abcd,6,7,20-15,,,,,,
easy to add-on for other formats
private int[] ParseRange(string ranges)
{
string[] groups = ranges.Split(',');
return groups.SelectMany(t => GetRangeNumbers(t)).ToArray();
}
private int[] GetRangeNumbers(string range)
{
//string justNumbers = new String(text.Where(Char.IsDigit).ToArray());
int[] RangeNums = range
.Split('-')
.Select(t => new String(t.Where(Char.IsDigit).ToArray())) // Digits Only
.Where(t => !string.IsNullOrWhiteSpace(t)) // Only if has a value
.Select(t => int.Parse(t)) // digit to int
.ToArray();
return RangeNums.Length.Equals(2) ? Enumerable.Range(RangeNums.Min(), (RangeNums.Max() + 1) - RangeNums.Min()).ToArray() : RangeNums;
}
The answer I came up with:
static IEnumerable<string> ParseRange(string str)
{
var numbers = str.Split(',');
foreach (var n in numbers)
{
if (!n.Contains("-"))
yield return n;
else
{
string startStr = String.Join("", n.TakeWhile(c => c != '-'));
int startInt = Int32.Parse(startStr);
string endStr = String.Join("", n.Reverse().TakeWhile(c => c != '-').Reverse());
int endInt = Int32.Parse(endStr);
var range = Enumerable.Range(startInt, endInt - startInt + 1)
.Select(num => num.ToString());
foreach (var s in range)
yield return s;
}
}
}
You can't be sure till you have test cases. In my case i would prefer to be white space delimited instead of comma delimited. It make the parsing a little more complex.
[Fact]
public void ShouldBeAbleToParseRanges()
{
RangeParser.Parse( "1" ).Should().BeEquivalentTo( 1 );
RangeParser.Parse( "-1..2" ).Should().BeEquivalentTo( -1,0,1,2 );
RangeParser.Parse( "-1..2 " ).Should().BeEquivalentTo( -1,0,1,2 );
RangeParser.Parse( "-1..2 5" ).Should().BeEquivalentTo( -1,0,1,2,5 );
RangeParser.Parse( " -1 .. 2 5" ).Should().BeEquivalentTo( -1,0,1,2,5 );
}
Note that Keith's answer ( or a small variation) will fail the last test where there is whitespace between the range token. This requires a tokenizer and a proper parser with lookahead.
namespace Utils
{
public class RangeParser
{
public class RangeToken
{
public string Name;
public string Value;
}
public static IEnumerable<RangeToken> Tokenize(string v)
{
var pattern =
@"(?<number>-?[1-9]+[0-9]*)|" +
@"(?<range>\.\.)";
var regex = new Regex( pattern );
var matches = regex.Matches( v );
foreach (Match match in matches)
{
var numberGroup = match.Groups["number"];
if (numberGroup.Success)
{
yield return new RangeToken {Name = "number", Value = numberGroup.Value};
continue;
}
var rangeGroup = match.Groups["range"];
if (rangeGroup.Success)
{
yield return new RangeToken {Name = "range", Value = rangeGroup.Value};
}
}
}
public enum State { Start, Unknown, InRange}
public static IEnumerable<int> Parse(string v)
{
var tokens = Tokenize( v );
var state = State.Start;
var number = 0;
foreach (var token in tokens)
{
switch (token.Name)
{
case "number":
var nextNumber = int.Parse( token.Value );
switch (state)
{
case State.Start:
number = nextNumber;
state = State.Unknown;
break;
case State.Unknown:
yield return number;
number = nextNumber;
break;
case State.InRange:
int rangeLength = nextNumber - number+ 1;
foreach (int i in Enumerable.Range( number, rangeLength ))
{
yield return i;
}
state = State.Start;
break;
default:
throw new ArgumentOutOfRangeException();
}
break;
case "range":
switch (state)
{
case State.Start:
throw new ArgumentOutOfRangeException();
break;
case State.Unknown:
state = State.InRange;
break;
case State.InRange:
throw new ArgumentOutOfRangeException();
break;
default:
throw new ArgumentOutOfRangeException();
}
break;
default:
throw new ArgumentOutOfRangeException( nameof( token ) );
}
}
switch (state)
{
case State.Start:
break;
case State.Unknown:
yield return number;
break;
case State.InRange:
break;
default:
throw new ArgumentOutOfRangeException();
}
}
}
}
Should be simple:
foreach( string s in "1,3,5-10,12".Split(',') )
{
// try and get the number
int num;
if( int.TryParse( s, out num ) )
{
yield return num;
continue; // skip the rest
}
// otherwise we might have a range
// split on the range delimiter
string[] subs = s.Split('-');
int start, end;
// now see if we can parse a start and end
if( subs.Length > 1 &&
int.TryParse(subs[0], out start) &&
int.TryParse(subs[1], out end) &&
end >= start )
{
// create a range between the two values
int rangeLength = end - start + 1;
foreach(int i in Enumerable.Range(start, rangeLength))
{
yield return i;
}
}
}
Edit: thanks for the fix ;-)
It doesn't have a built-in way to do this, but it would be trivial to do using String.Split.
Simply split on ',' then you have a series of strings that represent either page numbers or ranges. Iterate over that series and do a String.Split of '-'. If there isn't a result, it's a plain page number, so stick it in your list of pages. If there is a result, take the left and right of the '-' as the bounds and use a simple for loop to add each page number to your final list over that range.
Can't take but 5 minutes to do, then maybe another 10 to add in some sanity checks to throw errors when the user tries to input invalid data (like "1-2-3" or something.)