My task is to check (>trillions checks), does two int contain any of predefined pairs of nibbles (first pair 0x2 0x7; second 0xd 0x8). For example:
bit offset:
The fastest solution is probably to use some kind of lookup table.
How constrained are you on memory? A 16 bit table would be 64K and let you test 4 nibbles at once. So 4 (1 for each nibble) of them would be 256K.
If I understand your problem, I think this will work. It's an 8 bit example -you can expand it to 16 bits. :
/* Look for 0x2 in either nibble - hits on 0x02, 0x20, 0x22 */
char table_0x2[] = {
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x02 */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20, 0x22 */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
char table_0x7[] = { fill this in };
char table_0xd[] = { fill this in };
char table_0x8[] = { fill this in };
int nibble_check (uint32_t A, uint32_t B)
{
int i;
for (i = 0; i < 4; i++) {
if ((table_0x2[A & 0xff] && table_0x7[B & 0xff]) ||
(table_0xd[A & 0xff] && table_0x8[B & 0xff])) {
/*
* check to see if the A&B hits are in corresponding
* nibbles - return 1 or break
*/
}
A = A >> 8;
B = B >> 8;
}
return 0;
}
Here's a better implementation:
/* 16 bit tables - upper 8 bits are A, lower 8 bits are B */
/* for 0x02, 0x07 */
char *table_2_7;
/* for 0x0d, 0x08 */
char *table_d_8;
void init(void)
{
int i;
int j;
/* error checking eliminated for brevity */
table_2_7 = malloc(64 * 1024);
table_d_8 = malloc(64 * 1024);
memset(table_2_7, 0, 64 * 1024);
memset(table_d_8, 0, 64 * 1024);
for (i = 0 ; i < 16; i++) {
for (j = 0 ; j < 16; j++) {
table_2_7[(i << 12) | (0x2 << 8) | (j << 4) | (0x7 << 0)] = 1;
table_2_7[(0x2 << 12) | (i << 8) | (0x7 << 4) | (j << 0)] = 1;
table_d_8[(i << 12) | (0xd << 8) | (j << 4) | (0x8 << 0)] = 1;
table_d_8[(0xd << 12) | (i << 8) | (0x8 << 4) | (j << 0)] = 1;
}
}
}
int nibble_check(uint32_t A, uint32_t B)
{
int i;
for (i = 0; i < 4; i++) {
if (table_2_7[ ((A & 0xff) << 8) | (B & 0xff) ] ||
table_d_8[ ((A & 0xff) << 8) | (B & 0xff) ]) {
return 1;
}
A = A >> 8;
B = B >> 8;
}
return 0;
}