问题
Given a seed string, I want to find its neighbors with at most differ in 2 positions. All the digits involve in generating string are only four (i.e. 0,1,2,3). This is the example for what I mean:
# In this example, 'first' column
# are neighbors with only 1 position differ.
# The rest of the columns are 2 positions differ
Seed = 000
100 110 120 130 101 102 103
200 210 220 230 201 202 203
300 310 320 330 301 302 303
010 011 012 013
020 021 022 023
030 031 032 033
001
002
003
Seed = 001
101 111 121 131 100 102 103
201 211 221 231 200 202 203
301 311 321 331 300 302 303
011 010 012 013
021 020 022 023
031 030 032 033
000
003
002
Hence given a tag of length L
we will have 3*L + 9L(L-1)/2 neighbors
But why this code of mine fails to generate it correctly? Especially when the seed string is other than "000".
Other approaches are also welcomed, escpecially with speed improvement. Since we will be processing millions of seed tags of length 34 to 36.
#include <iostream>
#include <vector>
#include <fstream>
#include <sstream>
using namespace std;
string ConvertInt2String(int IntVal) {
std::string S;
std::stringstream out;
out << IntVal;
S = out.str();
return S;
}
string Vec2Str (vector <int> NTg) {
string StTg = "";
for (unsigned i = 0; i < NTg.size(); i++) {
StTg += ConvertInt2String(NTg[i]);
}
return StTg;
}
template <typename T> void prn_vec(const std::vector < T >&arg, string sep="")
{
for (unsigned n = 0; n < arg.size(); n++) {
cout << arg[n] << sep;
}
return;
}
vector <int> neighbors(vector<int>& arg, int posNo, int baseNo) {
// pass base position and return neighbors
vector <int> transfVec;
transfVec = arg;
//modified according to strager's first post
transfVec[posNo % arg.size()] = baseNo;
return transfVec;
}
int main () {
vector <int> numTag;
numTag.push_back(0);
numTag.push_back(0);
numTag.push_back(1); // If "000" this code works, but not 001 or others
// Note that in actual practice numTag can be greater than 3
int TagLen = static_cast<int>(numTag.size());
for ( int p=0; p< TagLen ; p++ ) {
// First loop is to generate tags 1 position differ
for ( int b=1; b<=3 ; b++ ) {
int bval = b;
if (numTag[p] == b) {
bval = 0;
}
vector <int> nbnumTag = neighbors(numTag, p, bval);
string SnbnumTag = Vec2Str(nbnumTag);
cout << SnbnumTag;
cout << "\n";
// Second loop for tags in 2 position differ
for (int l=p+1; l < TagLen; l++) {
for (int c=1; c<=3; c++) {
int cval = c;
if (nbnumTag[l] == c) {
cval = c;
}
vector <int> nbnumTag2 = neighbors(nbnumTag, l, cval);
string SnbnumTag2 = Vec2Str(nbnumTag2);
cout << "\t" << SnbnumTag2;
cout << "\n";
}
}
}
}
return 0;
}
回答1:
Would this do it? It enumerates the tree of possible strings, pruning all with >2 differences from the original.
void walk(char* s, int i, int ndiff){
char c = s[i];
if (ndiff > 2) return;
if (c == '\0'){
if (ndiff > 0) print(s);
}
else {
s[i] = '0'; walk(s, i+1, (s[i]==c ? ndiff : ndiff+1);
s[i] = '1'; walk(s, i+1, (s[i]==c ? ndiff : ndiff+1);
s[i] = '2'; walk(s, i+1, (s[i]==c ? ndiff : ndiff+1);
s[i] = '3'; walk(s, i+1, (s[i]==c ? ndiff : ndiff+1);
s[i] = c;
}
}
char seed[] = "000";
main(){
walk(seed, 0, 0);
}
回答2:
Here's one way to do it that should work for any number of characters and length of string:
string base = "000";
char values[] = {'0', '1', '2', '3' };
for (int i = 0; i < base.length(); ++i)
{
for (int j = 0; j < countof(values); ++j)
{
if (base[i] != values[j])
{
string copy = base;
copy[i] = values[j];
cout << copy << endl;
for (int k = i+1; k < base.length(); ++k)
{
for (int l = 0; l < countof(values); ++l)
{
if (copy[k] != values[l])
{
string copy2 = copy;
copy[k] = values[l];
cout << copy2 << endl;
}
}
}
}
}
}
回答3:
This should be equivalent to generating all the strings within a hamming distance of 2, over a 4-symbol alphabet. I've seen algorithms for it, but I'm at a loss to find them right now. Perhaps this can serve as a pointer in the right direction.
回答4:
Your problem [EDIT: the original one (see previous revisions of question)] is that in your inner loop, you're only assigning the 'next' element. A quick fix is to wrap the write in neighbors
:
vector <int> neighbors(const vector<int>& arg, int posNo, int baseNo) {
// pass base position and return neighbors
vector <int> transfVec = arg
transfVec[posNo % arg.size()] = baseNo;
return transfVec;
}
This fix only works when you have two or three items in your array. If you want more, you need to rewrite your algorithm as it doesn't handle cases where the length is greater than three at all. (It shouldn't need to, even. The algorithm you use is just too restrictive.)
回答5:
These two if's:
if (numTag[p] == b) {
bval = 0;
}
if (nbnumTag[l] == c) {
cval = c;
}
Should instead have bodies of continue
.
These two loops should start at 0:
for ( int b=1; b<=3 ; b++ ) {
for (int c=1; c<=3; c++) {
// i.e.
for ( int b=0; b<=3 ; b++ ) {
for (int c=0; c<=3; c++) {
回答6:
It looks like strager has identified the main problem: the loop conditions. Your alphabet is 0,1,2,3, so you should loop over that whole range. 0 is not a special case, as your code tries to treat it. The special case is to skip the iteration when the alphabet value equals the value in your key, which is what the continue suggested by strager accomplishes.
Below is my version of your algorithm. It has some alternative ideas for loop structures, and it avoids copying the key by modifying it in place. Note that you can also change the size of the alphabet by changing the MIN_VALUE
and MAX_VALUE
constants.
Here's the output for the "001" case:
101 111 121 131 102 103 100
201 211 221 231 202 203 200
301 311 321 331 302 303 300
011 012 013 010
021 022 023 020
031 032 033 030
002
003
000
And here's the code:
#include <iostream>
#include <vector>
#include <string>
#include <sstream>
using namespace std;
const int MIN_VALUE = 0;
const int MAX_VALUE = 3;
int increment(int& ch)
{
if (ch == MAX_VALUE)
ch = MIN_VALUE;
else
++ch;
return ch;
}
string stringKey(const vector<int>& key)
{
ostringstream sout;
for (int i = 0; i < key.size(); ++i)
sout << key[i];
return sout.str();
}
int main()
{
vector<int> key;
key.push_back(0);
key.push_back(0);
key.push_back(1);
for (int outerKeyPos = 0; outerKeyPos < key.size(); ++outerKeyPos)
{
int outerOriginal = key[outerKeyPos];
while (increment(key[outerKeyPos]) != outerOriginal)
{
cout << stringKey(key);
for (int innerKeyPos = outerKeyPos + 1; innerKeyPos < key.size(); ++innerKeyPos)
{
int innerOriginal = key[innerKeyPos];
while (increment(key[innerKeyPos]) != innerOriginal)
{
cout << " " << stringKey(key);
}
}
cout << endl;
}
}
}
回答7:
I've tried to correct your algorithm, staying as close as possible to the original one:
int TagLen = static_cast<int>(numTag.size());
for ( int p=0; p< TagLen ; p++ ) {
// First loop is to generate tags 1 position differ
for ( int b=0; b<=3 ; b++ ) { // Loop over all 4 elements
int bval = b;
if (numTag[p] == b) {
continue; // This is the seed vector, ignore it
}
vector <int> nbnumTag = neighbors(numTag, p, bval);
string SnbnumTag = Vec2Str(nbnumTag);
cout << SnbnumTag;
cout << "\n";
// Second loop for tags in 2 position differ
for (int l=p+1; l < TagLen; l++) {
for (int c=0; c<=3; c++) {
int cval = c;
if (nbnumTag[l] == c) { // Loop over all 4 elements
continue; // This is nbnumTag, ignore it
}
vector <int> nbnumTag2 = neighbors(nbnumTag, l, cval);
string SnbnumTag2 = Vec2Str(nbnumTag2);
cout << "\t" << SnbnumTag2;
cout << "\n";
}
}
}
}
The problem is that you don't iterate over all 4 possible values (0,1,2,3), but you skip 0 for some reason. The way I am doing it is to iterate over all of them and ignore (by using a continue) the vector that is the same with the seed or the 1-point different tag computed at phase 1.
Having said that, I believe that better algorithms than yours are proposed and it would be better to consider one of them.
回答8:
Here's my ugly, hacky solution:
#include <iostream>
#include <vector>
using std::cout;
using std::endl;
using std::vector;
struct tri
{
tri(int a, int b, int c)
{
switch (a)
{
case 0:
m[0] = 0;
m[1] = b;
m[2] = c;
break;
case 1:
m[0] = b;
m[1] = 0;
m[2] = c;
break;
case 2:
m[0] = b;
m[1] = c;
m[2] = 0;
break;
}
}
int m[3];
};
int main()
{
vector<tri> v;
for (int i = 0; i < 3; i++)
for (int j = 0; j < 4; j++)
for (int k = 0; k < 4; k++)
{
v.push_back(tri(i,j,k));
}
vector<tri>::iterator it;
for (it = v.begin(); it != v.end(); ++it)
{
cout << (*it).m[0];
cout << (*it).m[1];
cout << (*it).m[2];
cout << endl;
}
}
来源:https://stackoverflow.com/questions/680011/finding-strings-neighbors-by-up-to-2-differing-positions