I\'m sorting strings that are comprised of text and numbers. I want the sort to sort the number parts as numbers, not alphanumeric.
For example I want: abc1def, ...
Several natural sort implementations for C++ are available. A brief review:
Avalanchesort is a recursive variation of naturall sort, whiche merge runs, while exploring the stack of sorting-datas. The algorithim will sort stable, even if you add datas to your sorting-heap, while the algorithm is running/sorting.
The search-principle is simple. Only merge runs with the same rank.
After finding the first two naturell runs (rank 0), avalanchesort merge them to a run with rank 1. Then it call avalanchesort, to generate a second run with rank 1 and merge the two runs to a run with rank 2. Then it call the avalancheSort to generate a run with rank 2 on the unsorted datas....
My Implementation porthd/avalanchesort divide the sorting from the handling of the data using interface injection. You can use the algorithmn for datastructures like array, associative arrays or lists.
/**
* @param DataListAvalancheSortInterface $dataList
* @param DataRangeInterface $beginRange
* @param int $avalancheIndex
* @return bool
*/
public function startAvalancheSort(DataListAvalancheSortInterface $dataList)
{
$avalancheIndex = 0;
$rangeResult = $this->avalancheSort($dataList, $dataList->getFirstIdent(), $avalancheIndex);
if (!$dataList->isLastIdent($rangeResult->getStop())) {
do {
$avalancheIndex++;
$lastIdent = $rangeResult->getStop();
if ($dataList->isLastIdent($lastIdent)) {
$rangeResult = new $this->rangeClass();
$rangeResult->setStart($dataList->getFirstIdent());
$rangeResult->setStop($dataList->getLastIdent());
break;
}
$nextIdent = $dataList->getNextIdent($lastIdent);
$rangeFollow = $this->avalancheSort($dataList, $nextIdent, $avalancheIndex);
$rangeResult = $this->mergeAvalanche($dataList, $rangeResult, $rangeFollow);
} while (true);
}
return $rangeResult;
}
/**
* @param DataListAvalancheSortInterface $dataList
* @param DataRangeInterface $range
* @return DataRangeInterface
*/
protected function findRun(DataListAvalancheSortInterface $dataList,
$startIdent)
{
$result = new $this->rangeClass();
$result->setStart($startIdent);
$result->setStop($startIdent);
do {
if ($dataList->isLastIdent($result->getStop())) {
break;
}
$nextIdent = $dataList->getNextIdent($result->getStop());
if ($dataList->oddLowerEqualThanEven(
$dataList->getDataItem($result->getStop()),
$dataList->getDataItem($nextIdent)
)) {
$result->setStop($nextIdent);
} else {
break;
}
} while (true);
return $result;
}
/**
* @param DataListAvalancheSortInterface $dataList
* @param $beginIdent
* @param int $avalancheIndex
* @return DataRangeInterface|mixed
*/
protected function avalancheSort(DataListAvalancheSortInterface $dataList,
$beginIdent,
int $avalancheIndex = 0)
{
if ($avalancheIndex === 0) {
$rangeFirst = $this->findRun($dataList, $beginIdent);
if ($dataList->isLastIdent($rangeFirst->getStop())) {
// it is the last run
$rangeResult = $rangeFirst;
} else {
$nextIdent = $dataList->getNextIdent($rangeFirst->getStop());
$rangeSecond = $this->findRun($dataList, $nextIdent);
$rangeResult = $this->mergeAvalanche($dataList, $rangeFirst, $rangeSecond);
}
} else {
$rangeFirst = $this->avalancheSort($dataList,
$beginIdent,
($avalancheIndex - 1)
);
if ($dataList->isLastIdent($rangeFirst->getStop())) {
$rangeResult = $rangeFirst;
} else {
$nextIdent = $dataList->getNextIdent($rangeFirst->getStop());
$rangeSecond = $this->avalancheSort($dataList,
$nextIdent,
($avalancheIndex - 1)
);
$rangeResult = $this->mergeAvalanche($dataList, $rangeFirst, $rangeSecond);
}
}
return $rangeResult;
}
protected function mergeAvalanche(DataListAvalancheSortInterface $dataList, $oddListRange, $evenListRange)
{
$resultRange = new $this->rangeClass();
$oddNextIdent = $oddListRange->getStart();
$oddStopIdent = $oddListRange->getStop();
$evenNextIdent = $evenListRange->getStart();
$evenStopIdent = $evenListRange->getStop();
$dataList->initNewListPart($oddListRange, $evenListRange);
do {
if ($dataList->oddLowerEqualThanEven(
$dataList->getDataItem($oddNextIdent),
$dataList->getDataItem($evenNextIdent)
)) {
$dataList->addListPart($oddNextIdent);
if ($oddNextIdent === $oddStopIdent) {
$restTail = $evenNextIdent;
$stopTail = $evenStopIdent;
break;
}
$oddNextIdent = $dataList->getNextIdent($oddNextIdent);
} else {
$dataList->addListPart($evenNextIdent);
if ($evenNextIdent === $evenStopIdent) {
$restTail = $oddNextIdent;
$stopTail = $oddStopIdent;
break;
}
$evenNextIdent = $dataList->getNextIdent($evenNextIdent);
}
} while (true);
while ($stopTail !== $restTail) {
$dataList->addListPart($restTail);
$restTail = $dataList->getNextIdent($restTail);
}
$dataList->addListPart($restTail);
$dataList->cascadeDataListChange($resultRange);
return $resultRange;
}
}
Partially reposting my another answer:
bool compareNat(const std::string& a, const std::string& b){
if (a.empty())
return true;
if (b.empty())
return false;
if (std::isdigit(a[0]) && !std::isdigit(b[0]))
return true;
if (!std::isdigit(a[0]) && std::isdigit(b[0]))
return false;
if (!std::isdigit(a[0]) && !std::isdigit(b[0]))
{
if (a[0] == b[0])
return compareNat(a.substr(1), b.substr(1));
return (toUpper(a) < toUpper(b));
//toUpper() is a function to convert a std::string to uppercase.
}
// Both strings begin with digit --> parse both numbers
std::istringstream issa(a);
std::istringstream issb(b);
int ia, ib;
issa >> ia;
issb >> ib;
if (ia != ib)
return ia < ib;
// Numbers are the same --> remove numbers and recurse
std::string anew, bnew;
std::getline(issa, anew);
std::getline(issb, bnew);
return (compareNat(anew, bnew));
}
toUpper()
function:
std::string toUpper(std::string s){
for(int i=0;i<(int)s.length();i++){s[i]=toupper(s[i]);}
return s;
}
Usage:
std::vector<std::string> str;
str.push_back("abc1def");
str.push_back("abc10def");
...
std::sort(str.begin(), str.end(), compareNat);
I asked this exact question (although in Java) and got pointed to http://www.davekoelle.com/alphanum.html which has an algorithm and implementations of it in many languages.
For those that arrive here and are already using Qt in their project, you can use the QCollator
class. See this question for details.
This is known as natural sorting. There's an algorithm here that looks promising.
Be careful of problems with non-ASCII characters (see Jeff's blog entry on the subject).