问题
We have map of boost path to string pairs like name:location (absolute location paths a la usr/myfolder/
). We are given with some location a la usr/myfolder/mysubfolder/myfile
. How to find which of maps location fit to given url most?
Example we have a map like which we can resort if we need:
service1:myfolder/
service2:myfolder/mysubfolder/
service3:myfolder/myothersubfolder/
service4:myfolder/mysubfolder/myfile
We are given value myfolder/mysubfolder/myfile/blablabla/
(path).
We want to find out to which item in our map it relates the most.
Search result shall be service4
as map item with most related content.
So how to find by given string value to which map element it relates the most?
So original question was about general string case but I had some reconfiguration so no I just work on boost paths.
回答1:
I don't really have a ready C++ answer, but I had to do something similar in C# recently, and came up with the following:
Loop through the whole vector, checking the interesting path to see if it begins with an element. The longest such match is the winner. This would be an O(n) operation, depending upon the number of paths in the comparison set.
My refined version of the above became a little different, because I was going to be checking against a number of entries I'd already checked before.
So, I sorted the vector by descending length of path, so that the first match I come across would also be the best (giving me an average O(n/2) operation, I think), and stored results into a dictionary, so I wouldn't need to brute force the search again.
Hope this helps!
回答2:
You could use the Levenshtein distance
EDIT
Since I finally needed something similar myself, and this question remains open. Here is some code I played around with. Both straight up string distance and also applying the Levenshtein algorithm to the path tokens.
C++ Code
/*
----- String based Levenshtein ----
Matching : this/is/a/strange/path
0 : this/is/a/strange/path
2 : this/is/a/strong/path
2 : that/is/a/strange/path
4 : is/this/a/strange/path
5 : this/is/a/strange
13 : this/is/a
15 : this/is
16 : that/was
18 : this
24 : completely/different/folder
----- Path based Levenshtein ----
Matching : this/is/a/strange/path
0 : this/is/a/strange/path
1 : this/is/a/strange
2 : this/is/a/strong/path
2 : that/is/a/strange/path
2 : this/is/a
2 : is/this/a/strange/path
3 : this/is
4 : this
7 : that/was
8 : completely/different/folder
*/
#include <string>
#include <vector>
#include <algorithm>
#include <stdio.h>
/// returns the smaller of two parameters
template< typename T >
T levmin( T v1, T v2 )
{ return ( v1 < v2 ) ? v1 : v2; }
/// Returns the Levenshtein distance between the specified strings
template < typename T, typename T_STR >
typename T_STR::size_type levstr(const T_STR &s1, const T_STR &s2)
{
typename T_STR::size_type l1 = s1.length(), l2 = s2.length();
std::vector< typename T_STR::size_type > d( ( l1 + 1 ) * ( l2 + 1 ) );
typename T_STR::size_type i, j;
for ( i = 0; i <= l1; i++ )
d[ i * l2 ] = i;
for ( i = 0; i <= l2; i++ )
d[ i ] = i;
for ( i = 1; i <= l1; i++ )
for ( j = 1; j <= l2; j++ )
d[ i * l2 + j ] = levmin( levmin( d[ ( i - 1 ) * l2 + j ] + 1, d[ i * l2 + ( j - 1 ) ] + 1 ),
d[ ( i - 1 ) * l2 + ( j - 1 ) ] + ( s1[ i - 1 ] == s2[ j - 1 ] ? 0 : 1 )
);
return d[ ( l1 * l2 ) + l2 ];
}
/// Returns the Levenshtein distance between the specified split paths
template < typename T, typename T_STR, typename T_LST >
typename T_STR::size_type levpath(const T_LST &lst1, const T_LST &lst2)
{
typename T_STR::size_type l1 = lst1.size(), l2 = lst2.size();
std::vector< typename T_STR::size_type > d( ( l1 + 1 ) * ( l2 + 1 ) );
typename T_STR::size_type i, j;
for ( i = 0; i <= l1; i++ )
d[ i * l2 ] = i;
for ( i = 0; i <= l2; i++ )
d[ i ] = i;
for ( i = 1; i <= l1; i++ )
for ( j = 1; j <= l2; j++ )
d[ i * l2 + j ] = levmin( levmin( d[ ( i - 1 ) * l2 + j ] + 1, d[ i * l2 + ( j - 1 ) ] + 1 ),
d[ ( i - 1 ) * l2 + ( j - 1 ) ] + levstr< T, T_STR>( lst1[ i - 1 ], lst2[ j - 1 ] )
);
return d[ ( l1 * l2 ) + l2 ];
}
/// Generic split path function
/*
Returns a vector of path tokens
*/
template < typename T, typename T_STR, typename T_LST >
T_LST splitpath( const T_STR &s, const T sep )
{ T_LST lst;
typename T_STR::size_type i = 0, l = 0;
while( T_STR::npos != ( i = s.find_first_of( sep, i ) ) )
{ if ( l < i )
lst.push_back( T_STR( s, l, i - l ) );
l = ++i;
} // end while
if ( l < s.length() )
lst.push_back( T_STR( s, l, s.length() - l ) );
return lst;
}
/// Generic join path function
/*
Returns a string of joined vector tokens
*/
template < typename T, typename T_STR, typename T_LST >
T_STR joinpath( const T_LST &p, const T sep )
{ T_STR s;
for ( typename T_LST::const_iterator it = p.begin(); it != p.end(); it++ )
{ if ( s.length() ) s += sep; s += *it; }
return s;
}
// String types
typedef char t_levchar;
typedef std::basic_string< t_levchar > t_levstr;
typedef std::vector< t_levstr > t_levpath;
typedef std::vector< t_levpath > t_levpathlist;
// Sort compare for split paths
template< typename T, typename T_STR, typename T_LST > struct levcmp
{ levcmp( const T_LST &p ) { m_p = p; }
bool operator() ( const T_LST &i, const T_LST &j )
{ return levpath< T, T_STR, T_LST >( i, m_p ) < levpath< T, T_STR, T_LST >( j, m_p ); }
T_LST m_p;
};
// Sort compare for strings
template< typename T, typename T_STR > struct levcmp_str
{ levcmp_str( const T_STR &s ) { m_s = s; }
bool operator() ( const T_STR &i, const T_STR &j )
{ return levstr< T, T_STR >( i, m_s ) < levstr< T, T_STR >( j, m_s ); }
T_STR m_s;
};
int main( int argc, char* argv[] )
{
// Path to compare with
const t_levchar* compare_path = "this/is/a/strange/path";
// Paths to sort
const t_levchar* path_list[] =
{
"this/is/a/strong/path",
"that/is/a/strange/path",
"this/is/a/strange",
"this/is",
"this/is/a",
"this",
"this/is/a/strange/path",
"is/this/a/strange/path",
"that/was",
"completely/different/folder",
0
};
printf( "\n----- String based Levenshtein ----\n"
"Matching : %s\n\n", compare_path );
// Create vector from paths
std::vector< t_levstr > paths;
for( int i = 0; path_list[ i ]; i++ )
paths.push_back( path_list[ i ] );
// Sort the paths
std::sort( paths.begin(), paths.end(), levcmp_str< t_levchar, t_levstr >( compare_path ) );
// Show the result
for ( std::vector< t_levstr >::iterator it = paths.begin(); it != paths.end(); it++ )
printf( "%d : %s\n",
(int)levstr< t_levchar, t_levstr >( *it, compare_path ),
(const char*)it->c_str() );
printf( "\n----- Path based Levenshtein ----\n"
"Matching : %s\n\n", compare_path );
// Create vector from split paths
t_levpath splitcompare = splitpath< t_levchar, t_levstr, t_levpath >( compare_path, '/' );
t_levpathlist splitpaths;
for ( int i = 0; path_list[ i ]; i++ )
splitpaths.push_back( splitpath< t_levchar, t_levstr, t_levpath >( path_list[ i ], '/' ) );
// Sort the paths
std::sort( splitpaths.begin(), splitpaths.end(),
levcmp< t_levchar, t_levstr, t_levpath >( splitcompare ) );
// Show the results
for ( t_levpathlist::iterator it = splitpaths.begin(); it != splitpaths.end(); it++ )
printf( "%d : %s\n",
(int)levpath< t_levchar, t_levstr, t_levpath >( *it, splitcompare ),
(const char*)joinpath< t_levchar, t_levstr, t_levpath >( *it, '/' ).c_str() );
return 0;
}
回答3:
//I don't know what path is, I'll pretend it's a std::string
//algorithm is the same, just functions change
const std::vector<boost::path>& services;
const std::string& findme = ???
std::vector<boost::path>::iterator result = services.end();
for(auto i = services.begin(); i != services.end(); ++i) {
//if this path is shorter than the best so far, skip it
if (result == services.end() || i->second.size()>result->second.size()) {
const size_t colonpos = i->second.find(':', 8); //find colon
//if this path is longer than findme, skip it
if (findme.size() >= i->second.size()-colonpos) {
//make sure they match, in _reverse_ order (faster in this case)
int o=i->second.size()-1;
for( ; o>=colonpos; --o) {
if (i->second[o] != findme[o-colonpos])
break;
}
//if it was a match, this is the new best result
if (o < colonpos)
result = i;
}
}
}
//either result is services.end() meaning none matched
//or result is the _longest_ path that matched
Obviously, boost::path
isn't a std::string
, but probably has a member to get a std::string
or similar object, so you'll just have to add that member to i->second
and result->second
来源:https://stackoverflow.com/questions/5997723/having-a-map-with-paths-how-to-compare-tham-to-given-path