My task is to check (>trillions checks), does two int contain any of predefined pairs of nibbles (first pair 0x2 0x7; second 0xd 0x8). For example:
bit offset:
static inline int nibble_check (uint32_t A, uint32_t B)
__attribute__((always_inline))
{
// shift x by n nibbles
#define s(x, n) ((x) << 4 * (n))
// mask the nth nibble of x
#define m(x, n) ((x) & s(0xf, n))
// D^8 and 2^7 both == 5, so check for that first, for speed
// this is equivalent to
// (A_nibble == 0XD && B_nibble == 0x8) || (A_nibble == 0x2 && B_nibble == 0x7)
#define t(n) (m(AB,n) == s(5,n) && (m(B,n) == s(7,n) || m(B,n) == s(8,n))
uint32_t AB x = A ^ B;
return t(0) || t(1) || t(2) || t(3) || t(4) || t(5) || t(6) || t(7);
#undef t
#undef m
#undef s
}