What\'s the C++ way of parsing a string (given as char *) into an int? Robust and clear error handling is a plus (instead of returning zero).
I know this is an older question, but I've come across it so many times and, to date, have still not found a nicely templated solution having the following characteristics:
So, here is mine, with a test strap. Because it uses the C functions strtoull/strtoll under the hood, it always converts first to the largest type available. Then, if you are not using the largest type, it will perform additional range checks to verify your type was not over(under)flowed. For this, it is a little less performant than if one properly chose strtol/strtoul. However, it also works for shorts/chars and, to the best of my knowledge, there exists no standard library function that does that, too.
Enjoy; hopefully someone finds it useful.
#include
#include
#include
#include
#include
static const int DefaultBase = 10;
template
static inline T CstrtoxllWrapper(const char *str, int base = DefaultBase)
{
while (isspace(*str)) str++; // remove leading spaces; verify there's data
if (*str == '\0') { throw std::invalid_argument("str; no data"); } // nothing to convert
// NOTE: for some reason strtoull allows a negative sign, we don't; if
// converting to an unsigned then it must always be positive!
if (!std::numeric_limits::is_signed && *str == '-')
{ throw std::invalid_argument("str; negative"); }
// reset errno and call fn (either strtoll or strtoull)
errno = 0;
char *ePtr;
T tmp = std::numeric_limits::is_signed ? strtoll(str, &ePtr, base)
: strtoull(str, &ePtr, base);
// check for any C errors -- note these are range errors on T, which may
// still be out of the range of the actual type we're using; the caller
// may need to perform additional range checks.
if (errno != 0)
{
if (errno == ERANGE) { throw std::range_error("str; out of range"); }
else if (errno == EINVAL) { throw std::invalid_argument("str; EINVAL"); }
else { throw std::invalid_argument("str; unknown errno"); }
}
// verify everything converted -- extraneous spaces are allowed
if (ePtr != NULL)
{
while (isspace(*ePtr)) ePtr++;
if (*ePtr != '\0') { throw std::invalid_argument("str; bad data"); }
}
return tmp;
}
template
T StringToSigned(const char *str, int base = DefaultBase)
{
static const long long max = std::numeric_limits::max();
static const long long min = std::numeric_limits::min();
long long tmp = CstrtoxllWrapper(str, base); // use largest type
// final range check -- only needed if not long long type; a smart compiler
// should optimize this whole thing out
if (sizeof(T) == sizeof(tmp)) { return tmp; }
if (tmp < min || tmp > max)
{
std::ostringstream err;
err << "str; value " << tmp << " out of " << sizeof(T) * 8
<< "-bit signed range (";
if (sizeof(T) != 1) err << min << ".." << max;
else err << (int) min << ".." << (int) max; // don't print garbage chars
err << ")";
throw std::range_error(err.str());
}
return tmp;
}
template
T StringToUnsigned(const char *str, int base = DefaultBase)
{
static const unsigned long long max = std::numeric_limits::max();
unsigned long long tmp = CstrtoxllWrapper(str, base); // use largest type
// final range check -- only needed if not long long type; a smart compiler
// should optimize this whole thing out
if (sizeof(T) == sizeof(tmp)) { return tmp; }
if (tmp > max)
{
std::ostringstream err;
err << "str; value " << tmp << " out of " << sizeof(T) * 8
<< "-bit unsigned range (0..";
if (sizeof(T) != 1) err << max;
else err << (int) max; // don't print garbage chars
err << ")";
throw std::range_error(err.str());
}
return tmp;
}
template
inline T
StringToDecimal(const char *str, int base = DefaultBase)
{
return std::numeric_limits::is_signed ? StringToSigned(str, base)
: StringToUnsigned(str, base);
}
template
inline T
StringToDecimal(T &out_convertedVal, const char *str, int base = DefaultBase)
{
return out_convertedVal = StringToDecimal(str, base);
}
/*============================== [ Test Strap ] ==============================*/
#include
#include
static bool _g_anyFailed = false;
template
void TestIt(const char *tName,
const char *s, int base,
bool successExpected = false, T expectedValue = 0)
{
#define FAIL(s) { _g_anyFailed = true; std::cout << s; }
T x;
std::cout << "converting<" << tName << ">b:" << base << " [" << s << "]";
try
{
StringToDecimal(x, s, base);
// get here on success only
if (!successExpected)
{
FAIL(" -- TEST FAILED; SUCCESS NOT EXPECTED!" << std::endl);
}
else
{
std::cout << " -> ";
if (sizeof(T) != 1) std::cout << x;
else std::cout << (int) x; // don't print garbage chars
if (x != expectedValue)
{
FAIL("; FAILED (expected value:" << expectedValue << ")!");
}
std::cout << std::endl;
}
}
catch (std::exception &e)
{
if (successExpected)
{
FAIL( " -- TEST FAILED; EXPECTED SUCCESS!"
<< " (got:" << e.what() << ")" << std::endl);
}
else
{
std::cout << "; expected exception encounterd: [" << e.what() << "]" << std::endl;
}
}
}
#define TEST(t, s, ...) \
TestIt(#t, s, __VA_ARGS__);
int main()
{
std::cout << "============ variable base tests ============" << std::endl;
TEST(int, "-0xF", 0, true, -0xF);
TEST(int, "+0xF", 0, true, 0xF);
TEST(int, "0xF", 0, true, 0xF);
TEST(int, "-010", 0, true, -010);
TEST(int, "+010", 0, true, 010);
TEST(int, "010", 0, true, 010);
TEST(int, "-10", 0, true, -10);
TEST(int, "+10", 0, true, 10);
TEST(int, "10", 0, true, 10);
std::cout << "============ base-10 tests ============" << std::endl;
TEST(int, "-010", 10, true, -10);
TEST(int, "+010", 10, true, 10);
TEST(int, "010", 10, true, 10);
TEST(int, "-10", 10, true, -10);
TEST(int, "+10", 10, true, 10);
TEST(int, "10", 10, true, 10);
TEST(int, "00010", 10, true, 10);
std::cout << "============ base-8 tests ============" << std::endl;
TEST(int, "777", 8, true, 0777);
TEST(int, "-0111 ", 8, true, -0111);
TEST(int, "+0010 ", 8, true, 010);
std::cout << "============ base-16 tests ============" << std::endl;
TEST(int, "DEAD", 16, true, 0xDEAD);
TEST(int, "-BEEF", 16, true, -0xBEEF);
TEST(int, "+C30", 16, true, 0xC30);
std::cout << "============ base-2 tests ============" << std::endl;
TEST(int, "-10011001", 2, true, -153);
TEST(int, "10011001", 2, true, 153);
std::cout << "============ irregular base tests ============" << std::endl;
TEST(int, "Z", 36, true, 35);
TEST(int, "ZZTOP", 36, true, 60457993);
TEST(int, "G", 17, true, 16);
TEST(int, "H", 17);
std::cout << "============ space deliminated tests ============" << std::endl;
TEST(int, "1337 ", 10, true, 1337);
TEST(int, " FEAD", 16, true, 0xFEAD);
TEST(int, " 0711 ", 0, true, 0711);
std::cout << "============ bad data tests ============" << std::endl;
TEST(int, "FEAD", 10);
TEST(int, "1234 asdfklj", 10);
TEST(int, "-0xF", 10);
TEST(int, "+0xF", 10);
TEST(int, "0xF", 10);
TEST(int, "-F", 10);
TEST(int, "+F", 10);
TEST(int, "12.4", 10);
TEST(int, "ABG", 16);
TEST(int, "10011002", 2);
std::cout << "============ int8_t range tests ============" << std::endl;
TEST(int8_t, "7F", 16, true, std::numeric_limits::max());
TEST(int8_t, "80", 16);
TEST(int8_t, "-80", 16, true, std::numeric_limits::min());
TEST(int8_t, "-81", 16);
TEST(int8_t, "FF", 16);
TEST(int8_t, "100", 16);
std::cout << "============ uint8_t range tests ============" << std::endl;
TEST(uint8_t, "7F", 16, true, std::numeric_limits::max());
TEST(uint8_t, "80", 16, true, std::numeric_limits::max()+1);
TEST(uint8_t, "-80", 16);
TEST(uint8_t, "-81", 16);
TEST(uint8_t, "FF", 16, true, std::numeric_limits::max());
TEST(uint8_t, "100", 16);
std::cout << "============ int16_t range tests ============" << std::endl;
TEST(int16_t, "7FFF", 16, true, std::numeric_limits::max());
TEST(int16_t, "8000", 16);
TEST(int16_t, "-8000", 16, true, std::numeric_limits::min());
TEST(int16_t, "-8001", 16);
TEST(int16_t, "FFFF", 16);
TEST(int16_t, "10000", 16);
std::cout << "============ uint16_t range tests ============" << std::endl;
TEST(uint16_t, "7FFF", 16, true, std::numeric_limits::max());
TEST(uint16_t, "8000", 16, true, std::numeric_limits::max()+1);
TEST(uint16_t, "-8000", 16);
TEST(uint16_t, "-8001", 16);
TEST(uint16_t, "FFFF", 16, true, std::numeric_limits::max());
TEST(uint16_t, "10000", 16);
std::cout << "============ int32_t range tests ============" << std::endl;
TEST(int32_t, "7FFFFFFF", 16, true, std::numeric_limits::max());
TEST(int32_t, "80000000", 16);
TEST(int32_t, "-80000000", 16, true, std::numeric_limits::min());
TEST(int32_t, "-80000001", 16);
TEST(int32_t, "FFFFFFFF", 16);
TEST(int32_t, "100000000", 16);
std::cout << "============ uint32_t range tests ============" << std::endl;
TEST(uint32_t, "7FFFFFFF", 16, true, std::numeric_limits::max());
TEST(uint32_t, "80000000", 16, true, std::numeric_limits::max()+1);
TEST(uint32_t, "-80000000", 16);
TEST(uint32_t, "-80000001", 16);
TEST(uint32_t, "FFFFFFFF", 16, true, std::numeric_limits::max());
TEST(uint32_t, "100000000", 16);
std::cout << "============ int64_t range tests ============" << std::endl;
TEST(int64_t, "7FFFFFFFFFFFFFFF", 16, true, std::numeric_limits::max());
TEST(int64_t, "8000000000000000", 16);
TEST(int64_t, "-8000000000000000", 16, true, std::numeric_limits::min());
TEST(int64_t, "-8000000000000001", 16);
TEST(int64_t, "FFFFFFFFFFFFFFFF", 16);
TEST(int64_t, "10000000000000000", 16);
std::cout << "============ uint64_t range tests ============" << std::endl;
TEST(uint64_t, "7FFFFFFFFFFFFFFF", 16, true, std::numeric_limits::max());
TEST(uint64_t, "8000000000000000", 16, true, std::numeric_limits::max()+1);
TEST(uint64_t, "-8000000000000000", 16);
TEST(uint64_t, "-8000000000000001", 16);
TEST(uint64_t, "FFFFFFFFFFFFFFFF", 16, true, std::numeric_limits::max());
TEST(uint64_t, "10000000000000000", 16);
std::cout << std::endl << std::endl
<< (_g_anyFailed ? "!! SOME TESTS FAILED !!" : "ALL TESTS PASSED")
<< std::endl;
return _g_anyFailed;
}
StringToDecimal
is the user-land method; it is overloaded so it can be called either like this:
int a; a = StringToDecimal("100");
or this:
int a; StringToDecimal(a, "100");
I hate repeating the int type, so prefer the latter. This ensures that if the type of 'a' changes one does not get bad results. I wish the compiler could figure it out like:
int a; a = StringToDecimal("100");
...but, C++ does not deduce template return types, so that's the best I can get.
The implementation is pretty simple:
CstrtoxllWrapper
wraps both strtoull
and strtoll
, calling whichever is necessary based on the template type's signed-ness and providing some additional guarantees (e.g. negative input is disallowed if unsigned and it ensures the entire string was converted).
CstrtoxllWrapper
is used by StringToSigned
and StringToUnsigned
with the largest type (long long/unsigned long long) available to the compiler; this allows the maximal conversion to be performed. Then, if it is necessary, StringToSigned
/StringToUnsigned
performs the final range checks on the underlying type. Finally, the end-point method, StringToDecimal
, decides which of the StringTo* template methods to call based on the underlying type's signed-ness.
I think most of the junk can be optimized out by the compiler; just about everything should be compile-time deterministic. Any commentary on this aspect would be interesting to me!