I am interested, which is the optimal way of calculating the number of bits set in byte by this way
template< unsigned char byte > class BITS_SET
{
pub
#include <iostream>
#include <climits> // for CHAR_BIT (most likely to be 8)
#include <cstring> // for memset
#include <new>
static const int DUMMY = -1;
// first approch : activate the O(8) function in first get try... after that its O(1);
class bitsInByteflyLUT
{
typedef unsigned char byte;
public:
bitsInByteflyLUT(); //CTOR - throws std::bad_alloc
~bitsInByteflyLUT(); //DTOR
int Get_bitsInByte(byte _byte);
private:
// CLASS DATA
int* flyLUT;
// PRIVATE FUNCTIONS
int bitsInByte(byte _byte);
// O(8) for finding how many bits are ON in a byte.
// answer can be between 0 to CHAR_BIT.
bitsInByteflyLUT(const bitsInByteflyLUT & _class); // COPY CTOR - forbidden
const bitsInByteflyLUT & operator= (const bitsInByteflyLUT& _class);
// ASSIGN OPERATOR - forbidden
};
bitsInByteflyLUT::bitsInByteflyLUT()
{
size_t nIndexes = 1 << CHAR_BIT;
try
{
flyLUT = new int[nIndexes];
}
catch (std::bad_alloc& ba)
{
throw;
}
memset(flyLUT, DUMMY, sizeof(int)*nIndexes);
}
bitsInByteflyLUT::~bitsInByteflyLUT()
{
delete[] flyLUT;
}
int bitsInByteflyLUT::Get_bitsInByte(byte _byte)
{
if (flyLUT[_byte] == DUMMY) // if its first time we try to get answer for this char.
{
flyLUT[_byte] = bitsInByte(_byte); // O(8)
}
return flyLUT[_byte]; // O(1)
}
int bitsInByteflyLUT::bitsInByte(byte _byte)
{
byte nBits = CHAR_BIT;
byte counter = 0;
byte mask = 1;
while(nBits--)
{
if(mask & _byte)
{
++counter;
}
mask <<= 1;
}
return counter;
}
int main ()
{
using std::cout;
using std::endl;
bitsInByteflyLUT flut;
for (unsigned int i = 0; i < (1 << CHAR_BIT); i += 1)
{
cout << i << " " << flut.Get_bitsInByte(i) << endl;
}
return 0;
}
For just a single byte value, the fastest way is to store the answer in an 256 byte array that you index with the value. For example, bits_set[] = {0, 1, 1, 2, ...
Why not just use the standard library? That way the optimal way should be determined by the implementation, and is likely better than any standards compliant code that you can actually write. For instance, if you're on an x86 this compiles to a single instruction but only if you're targeting CPUs that support it.
#include <bitset>
#include <iostream>
int main() {
unsigned char bitfield = 17;
std::cout << std::bitset<8>(bitfield).count() <<
std::endl;
}
#include <ctime>
#include <iostream>
using namespace std;
int count1s(unsigned char byte) {
if (byte == 0) {
return 0;
}
if (byte & 0x01) {
return 1 + count1s(byte >> 1);
}
return count1s(byte >> 1);
}
int count1s2(unsigned char byte) {
static const int ones[256] = {
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4,
2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4,
2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6,
4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5,
3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6,
4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8};
return ones[(int)byte];
}
int main() {
time_t start = clock();
int c = count1s(205);
time_t end = clock();
cout << "count1: " << c << " time: " << double(end - start) << endl;
start = clock();
c = count1s2(205);
end = clock();
cout << "count2: " << c << " time: " << double(end - start) << endl;
return 0;
}
int count(int a){ return a == 0 ? 0 : 1 + count(a&(a-1)); }