Moderator: Andres Valverde
Dann Corbit wrote:An important thing to think about is that almost all the time your bitboards will be very sparse. Even a full board with no chessmen removed is only half full of bits.
Your random function creates dense bitboards.
It would be good to experiment with both kinds.
Reinhard Scharnagl wrote:Pradu,
at my Pentium 4 my last version seems to be 35% faster, don't know when done with customized optimizations.
I never have used bitboards, just rewrote the routine.
Reinhard.
/* Trial (R. Scharnagl, second idea) */
/* (endian independent form) */
#define msk1 0xEEEEEEEEUL
#define msk2 0xCCCCCCCCUL
#define msk3 0x88888888UL
#define msk4 0x0F0F0F0FUL
int popCount2(const U64 b)
{
unsigned buf;
int acc;
buf = (unsigned)b;
acc = buf;
acc -= ((buf &= msk1)>>1);
acc -= ((buf &= msk2)>>2);
acc -= ((buf &= msk3)>>3);
buf = (unsigned)(b>>32);
acc += buf;
acc -= ((buf &= msk1)>>1);
acc -= ((buf &= msk2)>>2);
acc -= ((buf &= msk3)>>3);
acc = (acc & msk4) + ((acc >> 4) & msk4);
acc = (acc & 0xFFFF) + (acc >> 16);
acc = (acc & 0xFF) + (acc >> 8);
return acc;
}
Initialized rand Table: 1156 ms
popCount2: 2141 ms (-589497692)
popCount: 4328 ms (-589497692)
#include <assert.h>
typedef unsigned long long bitboard;
extern unsigned int t0_and_masks(bitboard x);
extern unsigned int t1_and_masks(bitboard i);
extern unsigned int t2_and_masks(bitboard b);
extern unsigned int t3_rmlsbsub(bitboard n);
extern unsigned int t4_rmlsbmask(bitboard n);
extern unsigned int t5_testlsb(bitboard n);
extern unsigned int t6_testmsb(bitboard n);
extern unsigned int t7_testsignandshift(bitboard n);
extern unsigned int t8_testeachbit(bitboard n);
extern unsigned int t9_testeachbit1shl(bitboard n);
extern unsigned int tA_tableshift(bitboard n);
extern unsigned int tB_tableuchar(bitboard n);
extern unsigned int tC_tableshiftcast(bitboard n);
extern unsigned int tD_itableshift(bitboard n);
extern unsigned int tE_itableuchar(bitboard n);
extern unsigned int tF_itableshiftcast(bitboard n);
extern unsigned int tG_parallel(bitboard n);
extern unsigned int tH_hamming(bitboard w);
extern unsigned int tI_Scharnagl(bitboard b);
unsigned t0_and_masks(bitboard x)
{
assert(x);
x = (x >> 1 & 0x5555555555555555) + (x & 0x5555555555555555);
x = ((x >> 2) & 0x3333333333333333) + (x & 0x3333333333333333);
x = ((x >> 4) + x) & 0x0f0f0f0f0f0f0f0f;
x = ((x >> 8) + x) & 0x00ff00ff00ff00ff;
x = ((x >> 16) + x) & 0x0000ffff0000ffff;
return (unsigned) ((x + (x >> 32)) & 0xff);
}
unsigned t1_and_masks(bitboard i)
{
unsigned j;
assert(i);
i = (i & 0x5555555555555555) + (i >> 1 & 0x5555555555555555);
i = (i & 0x3333333333333333) + (i >> 2 & 0x3333333333333333);
i = ((i >> 4) + i) & 0x0f0f0f0f0f0f0f0f;
j = (unsigned) (i + (i >> 32));
j += j >> 16;
return (j + (j >> 8)) & 0xff;
}
/*popCount()
*a noniterative population count of 1 bits in a quadword
*
*@param b - the quadword to be counted
*@returns the number of 1 bits in b
*/
#define m1 0x5555555555555555ULL
#define m2 0x3333333333333333ULL
unsigned t2_and_masks(bitboard b)
{
unsigned n;
const bitboard a = b - ((b >> 1) & m1);
const bitboard c = (a & m2) + ((a >> 2) & m2);
n = ((unsigned) c) + ((unsigned) (c >> 32));
n = (n & 0x0F0F0F0F) + ((n >> 4) & 0x0F0F0F0F);
n = (n & 0xFFFF) + (n >> 16);
n = (n & 0xFF) + (n >> 8);
return n;
}
/*
* This function counts the bits in a long long.
*
* It removes the lsb and counting the number of times round the loop.
* The expression (n & -n) yields the lsb of a number,
* but it only works on 2's compliment machines.
*/
unsigned t3_rmlsbsub(bitboard n)
{
unsigned count;
assert(n);
for (count = 0; n; n -= (n & -n))
count++;
return count;
}
unsigned t4_rmlsbmask(bitboard n)
{
unsigned count;
assert(n);
for (count = 0; n; count++)
n &= (n - 1); /* take away lsb */
return count;
}
/*
* This function counts the bits in a long long.
*
* It works by shifting the number down and testing the bottom bit.
*/
unsigned t5_testlsb(bitboard n)
{
unsigned count;
assert(n);
for (count = 0; n; n >>= 1)
count += (n & 1);
return count;
}
/*
* This function counts the bits in a long long.
*
* It works by shifting the number left and testing the top bit.
* On many machines shift is expensive, so it uses a cheap addition instead.
*/
unsigned t6_testmsb(bitboard n)
{
unsigned count;
assert(n);
for (count = 0; n; n += n)
if (n & ~(~(bitboard) 0 >> 1))
count++;
return count;
}
unsigned t7_testsignandshift(bitboard n)
{
unsigned count;
assert(n);
for (count = 0; n; n <<= 1)
if ((long long) n < 0)
count++;
return (count);
}
/*
* This function counts the bits in a long long.
*
* It works by masking each bit.
* This is the second most intuitively obvious method,
* and is independent of the number of bits in the long long.
*/
unsigned t8_testeachbit(bitboard n)
{
unsigned count;
bitboard mask;
assert(n);
count = 0;
for (mask = 1; mask; mask += mask)
if (n & mask)
count++;
return count;
}
/*
* This function counts the bits in a long long.
*
* It works by masking each bit.
* This is the most intuitively obvious method,
* but how do you a priori know how many bits in the long long?
* (except for ''sizeof(long long) * CHAR_BITS'' expression)
*/
unsigned t9_testeachbit1shl(bitboard n)
{
unsigned count;
unsigned bit;
assert(n);
count = 0;
for (bit = 0; bit < 64; ++bit)
if (n & ((bitboard) 1 << bit))
count++;
return count;
}
static const char bits_in_byte[256] =
{
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
};
unsigned tA_tableshift(bitboard n)
{
assert(n);
return (bits_in_byte[n & 0xff] + bits_in_byte[(n >> 8) & 0xff] +
bits_in_byte[(n >> 16) & 0xff] + bits_in_byte[(n >> 24) & 0xff] +
bits_in_byte[(n >> 32) & 0xff] + bits_in_byte[(n >> 40) & 0xff] +
bits_in_byte[(n >> 48) & 0xff] + bits_in_byte[n >> 56]
);
}
unsigned tB_tableuchar(bitboard n)
{
unsigned char *p = (unsigned char *) &n;
assert(n);
return (bits_in_byte[p[0]] + bits_in_byte[p[1]] + bits_in_byte[p[2]] + bits_in_byte[p[3]] +
bits_in_byte[p[4]] + bits_in_byte[p[5]] + bits_in_byte[p[6]] + bits_in_byte[p[7]]
);
}
unsigned tC_tableshiftcast(bitboard n)
{
assert(n);
return bits_in_byte[(unsigned char) n] +
bits_in_byte[(unsigned char) (n >> 8)] +
bits_in_byte[(unsigned char) (n >> 16)] +
bits_in_byte[(unsigned char) (n >> 24)] +
bits_in_byte[(unsigned char) (n >> 32)] +
bits_in_byte[(unsigned char) (n >> 40)] +
bits_in_byte[(unsigned char) (n >> 48)] +
bits_in_byte[(unsigned char) (n >> 56)];
}
unsigned tD_itableshift(bitboard n)
{
assert(n);
return (bits_in_byte[n & 0xff] + bits_in_byte[(n >> 8) & 0xff] +
bits_in_byte[(n >> 16) & 0xff] + bits_in_byte[(n >> 24) & 0xff] +
bits_in_byte[(n >> 32) & 0xff] + bits_in_byte[(n >> 40) & 0xff] +
bits_in_byte[(n >> 48) & 0xff] + bits_in_byte[(n >> 56)]
);
}
unsigned tE_itableuchar(bitboard n)
{
unsigned char *p = (unsigned char *) &n;
assert(n);
return (bits_in_byte[p[0]] + bits_in_byte[p[1]] + bits_in_byte[p[2]] + bits_in_byte[p[3]] +
bits_in_byte[p[4]] + bits_in_byte[p[5]] + bits_in_byte[p[6]] + bits_in_byte[p[7]]
);
}
unsigned tF_itableshiftcast(bitboard n)
{
assert(n);
return bits_in_byte[(unsigned char) n] +
bits_in_byte[(unsigned char) (n >> 8)] +
bits_in_byte[(unsigned char) (n >> 16)] +
bits_in_byte[(unsigned char) (n >> 24)] +
bits_in_byte[(unsigned char) (n >> 32)] +
bits_in_byte[(unsigned char) (n >> 40)] +
bits_in_byte[(unsigned char) (n >> 48)] +
bits_in_byte[(unsigned char) (n >> 56)];
}
unsigned tG_parallel(bitboard n)
{
assert(n);
n = ((n) & (0xffffffffffffffff / ((1 << ((1 << (0)))) + 1))) + (((n) >> ((1 << (0)))) & (0xffffffffffffffff / ((1 << ((1 << (0)))) + 1)));
n = ((n) & (0xffffffffffffffff / ((1 << ((1 << (1)))) + 1))) + (((n) >> ((1 << (1)))) & (0xffffffffffffffff / ((1 << ((1 << (1)))) + 1)));
n = ((n) & (0xffffffffffffffff / ((1 << ((1 << (2)))) + 1))) + (((n) >> ((1 << (2)))) & (0xffffffffffffffff / ((1 << ((1 << (2)))) + 1)));
n = ((n) & (0xffffffffffffffff / ((1 << ((1 << (3)))) + 1))) + (((n) >> ((1 << (3)))) & (0xffffffffffffffff / ((1 << ((1 << (3)))) + 1)));
n = ((n) & (0xffffffffffffffff / ((1 << ((1 << (4)))) + 1))) + (((n) >> ((1 << (4)))) & (0xffffffffffffffff / ((1 << ((1 << (4)))) + 1)));
n = ((n) & (0xffffffffffffffff / ((1 << ((1 << (5)))) + 1))) + (((n) >> ((1 << (5)))) & (0xffffffffffffffff / ((1 << ((1 << (5)))) + 1)));
return (unsigned) n;
}
unsigned tH_hamming(bitboard w)
{
bitboard res;
assert(w);
res = (w & 0x5555555555555555) + ((w >> 1) & 0x5555555555555555);
res = (res & 0x3333333333333333) + ((res >> 2) & 0x3333333333333333);
res = (res & 0x0F0F0F0F0F0F0F0F) + ((res >> 4) & 0x0F0F0F0F0F0F0F0F);
res = (res & 0x00FF00FF00FF00FF) + ((res >> 8) & 0x00FF00FF00FF00FF);
res = (res & 0x0000FFFF0000FFFF) + ((res >> 16) & 0x0000FFFF0000FFFF);
return (unsigned) ((res & 0x00000000FFFFFFFF) + ((res >> 32) & 0x00000000FFFFFFFF));
}
/* Trial (R. Scharnagl, first idea, not optimized) */
#define msk1 0xEEEEEEEE
#define msk2 0xCCCCCCCC
#define msk3 0x88888888
#define msk4 0x0F0F0F0F
unsigned tI_Scharnagl(bitboard b)
{
unsigned buf;
unsigned acc;
buf = (unsigned) b;
acc = buf - ((buf & msk1) >> 1)
- ((buf & msk2) >> 2)
- ((buf & msk3) >> 3);
buf = ((unsigned *) &b)[1]; /* Intel format */
acc += buf - ((buf & msk1) >> 1)
- ((buf & msk2) >> 2)
- ((buf & msk3) >> 3);
acc = (acc & msk4) + ((acc >> 4) & msk4);
acc = (acc & 0xFFFF) + (acc >> 16);
acc = (acc & 0xFF) + (acc >> 8);
return acc;
}
unsigned tJ_Scharnagl(bitboard b) /* P.S.: might be unsigned */
{
unsigned buf;
unsigned acc;
buf = (unsigned)b;
acc = buf - ((buf & msk1)>>1)
- ((buf & msk2)>>2)
- ((buf & msk3)>>3);
buf = ((unsigned *)&b)[1]; /* Intel format */
acc += buf - ((buf & msk1)>>1)
- ((buf & msk2)>>2)
- ((buf & msk3)>>3);
acc = (acc & msk4) + ((acc >> 4) & msk4);
acc = (acc & 0xFFFF) + (acc >> 16);
acc = (acc & 0xFF) + (acc >> 8);
return acc;
}
#ifdef UNIT_TEST
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <sys/timeb.h>
int getms()
{
struct timeb timebuffer;
ftime(&timebuffer);
return (timebuffer.time * 1000) + timebuffer.millitm;
}
#define rand64() (((bitboard)rand())^(((bitboard)rand())<<15)^(((bitboard)rand())<<30)^(((bitboard)rand())<<45)^(((bitboard)rand())<<60))
#define TABLE_SIZE 10000000
bitboard randtable[TABLE_SIZE];
typedef unsigned (*bc_func) (bitboard w);
unsigned time_it(bc_func b)
{
int i,
time;
long count = 0;
time = getms();
for (i = 0; i < TABLE_SIZE; i++)
count += b(randtable[i]);
printf(": %d ms\n", getms() - time);
return count;
}
void randomize_nbits(unsigned nbits)
{
unsigned i, j;
int time = getms();
for (i = 0; i < TABLE_SIZE; i++) {
randtable[i] = 0;
for(j = 0; j < nbits; j++)
randtable[i] |= 1 << (rand() % 64);
}
printf("Initialized rand Table for %u bits: %d ms\n", nbits, getms() - time);
}
void runtests(void)
{
unsigned result0;
unsigned result1;
unsigned result2;
unsigned result3;
unsigned result4;
unsigned result5;
unsigned result6;
unsigned result7;
unsigned result8;
unsigned result9;
unsigned resultA;
unsigned resultB;
unsigned resultC;
unsigned resultD;
unsigned resultE;
unsigned resultF;
unsigned resultG;
unsigned resultH;
unsigned resultI;
printf("t0_and_masks");
result0 = time_it(t0_and_masks);
printf("t1_and_masks");
result1 = time_it(t1_and_masks);
printf("t2_and_masks");
result2 = time_it(t2_and_masks);
printf("t3_rmlsbsub");
result3 = time_it(t3_rmlsbsub);
printf("t4_rmlsbmask");
result4 = time_it(t4_rmlsbmask);
printf("t5_testlsb");
result5 = time_it(t5_testlsb);
printf("t6_testmsb");
result6 = time_it(t6_testmsb);
printf("t7_testsignandshift");
result7 = time_it(t7_testsignandshift);
printf("t8_testeachbit");
result8 = time_it(t8_testeachbit);
printf("t9_testeachbit1shl");
result9 = time_it(t9_testeachbit1shl);
printf("tA_tableshift");
resultA = time_it(tA_tableshift);
printf("tB_tableuchar");
resultB = time_it(tB_tableuchar);
printf("tC_tableshiftcast");
resultC = time_it(tC_tableshiftcast);
printf("tD_itableshift");
resultD = time_it(tD_itableshift);
printf("tE_itableuchar");
resultE = time_it(tE_itableuchar);
printf("tF_itableshiftcast");
resultF = time_it(tF_itableshiftcast);
printf("tG_parallel");
resultG = time_it(tG_parallel);
printf("tH_hamming");
resultH = time_it(tH_hamming);
printf("tI_Scharnagl");
resultI = time_it(tI_Scharnagl);
printf("tJ_Scharnagl");
resultI = time_it(tJ_Scharnagl);
}
int main(void)
{
unsigned ut0_and_masks = 0;
unsigned ut1_and_masks = 0;
unsigned ut2_and_masks = 0;
unsigned ut3_rmlsbsub = 0;
unsigned ut4_rmlsbmask = 0;
unsigned ut5_testlsb = 0;
unsigned ut6_testmsb = 0;
unsigned ut7_testsignandshift = 0;
unsigned ut8_testeachbit = 0;
unsigned ut9_testeachbit1shl = 0;
unsigned utA_tableshift = 0;
unsigned utB_tableuchar = 0;
unsigned utC_tableshiftcast = 0;
unsigned utD_itableshift = 0;
unsigned utE_itableuchar = 0;
unsigned utF_itableshiftcast = 0;
unsigned utG_parallel = 0;
unsigned utH_hamming = 0;
unsigned utI_Scharnagl = 0;
unsigned utJ_Scharnagl = 0;
bitboard b;
int i;
int time = getms();
for (i = 0; i < TABLE_SIZE; i++)
randtable[i] = rand64();
printf("Initialized rand Table: %d ms\n", getms() - time);
runtests();
for (i = 1; i < 33; i++)
{
randomize_nbits(i);
runtests();
}
for (i = 0; i < 1000000; i++) {
#ifdef DENSE
b = rand();
b <<= 32;
b |= rand();
#else
b = (1 << (rand() % 32));
b <<= 32;
b |= (1 << (rand() % 64));
#endif
ut0_and_masks = t0_and_masks(b);
ut1_and_masks = t1_and_masks(b);
ut2_and_masks = t2_and_masks(b);
ut3_rmlsbsub = t3_rmlsbsub(b);
ut4_rmlsbmask = t4_rmlsbmask(b);
ut5_testlsb = t5_testlsb(b);
ut6_testmsb = t6_testmsb(b);
ut7_testsignandshift = t7_testsignandshift(b);
ut8_testeachbit = t8_testeachbit(b);
ut9_testeachbit1shl = t9_testeachbit1shl(b);
utA_tableshift = tA_tableshift(b);
utB_tableuchar = tB_tableuchar(b);
utC_tableshiftcast = tC_tableshiftcast(b);
utD_itableshift = tD_itableshift(b);
utE_itableuchar = tE_itableuchar(b);
utF_itableshiftcast = tF_itableshiftcast(b);
utG_parallel = tG_parallel(b);
utH_hamming = tH_hamming(b);
utI_Scharnagl = tI_Scharnagl(b);
utJ_Scharnagl = tJ_Scharnagl(b);
if ((ut0_and_masks != ut1_and_masks) ||
(ut0_and_masks != ut2_and_masks) ||
(ut0_and_masks != ut3_rmlsbsub) ||
(ut0_and_masks != ut4_rmlsbmask) ||
(ut0_and_masks != ut5_testlsb) ||
(ut0_and_masks != ut6_testmsb) ||
(ut0_and_masks != ut7_testsignandshift) ||
(ut0_and_masks != ut8_testeachbit) ||
(ut0_and_masks != ut9_testeachbit1shl) ||
(ut0_and_masks != utA_tableshift) ||
(ut0_and_masks != utB_tableuchar) ||
(ut0_and_masks != utC_tableshiftcast) ||
(ut0_and_masks != utD_itableshift) ||
(ut0_and_masks != utE_itableuchar) ||
(ut0_and_masks != utF_itableshiftcast) ||
(ut0_and_masks != utG_parallel) ||
(ut0_and_masks != utH_hamming) ||
(ut0_and_masks != utI_Scharnagl) ||
(ut0_and_masks != utJ_Scharnagl)
) {
printf("0: %u 1: %u\n", ut0_and_masks, ut1_and_masks);
printf("0: %u 2: %u\n", ut0_and_masks, ut2_and_masks);
printf("0: %u 3: %u\n", ut0_and_masks, ut3_rmlsbsub);
printf("0: %u 4: %u\n", ut0_and_masks, ut4_rmlsbmask);
printf("0: %u 5: %u\n", ut0_and_masks, ut5_testlsb);
printf("0: %u 6: %u\n", ut0_and_masks, ut6_testmsb);
printf("0: %u 7: %u\n", ut0_and_masks, ut7_testsignandshift);
printf("0: %u 8: %u\n", ut0_and_masks, ut8_testeachbit);
printf("0: %u 9: %u\n", ut0_and_masks, ut9_testeachbit1shl);
printf("0: %u A: %u\n", ut0_and_masks, utA_tableshift);
printf("0: %u B: %u\n", ut0_and_masks, utB_tableuchar);
printf("0: %u C: %u\n", ut0_and_masks, utC_tableshiftcast);
printf("0: %u D: %u\n", ut0_and_masks, utD_itableshift);
printf("0: %u E: %u\n", ut0_and_masks, utE_itableuchar);
printf("0: %u F: %u\n", ut0_and_masks, utF_itableshiftcast);
printf("0: %u G: %u\n", ut0_and_masks, utG_parallel);
printf("0: %u H: %u\n", ut0_and_masks, utH_hamming);
printf("0: %u I: %u\n", ut0_and_masks, utI_Scharnagl);
printf("0: %u J: %u\n", ut0_and_masks, utJ_Scharnagl);
}
b = 0xffffffffffffffff;
ut0_and_masks = t0_and_masks(b);
ut1_and_masks = t1_and_masks(b);
ut2_and_masks = t2_and_masks(b);
ut3_rmlsbsub = t3_rmlsbsub(b);
ut4_rmlsbmask = t4_rmlsbmask(b);
ut5_testlsb = t5_testlsb(b);
ut6_testmsb = t6_testmsb(b);
ut7_testsignandshift = t7_testsignandshift(b);
ut8_testeachbit = t8_testeachbit(b);
ut9_testeachbit1shl = t9_testeachbit1shl(b);
utA_tableshift = tA_tableshift(b);
utB_tableuchar = tB_tableuchar(b);
utC_tableshiftcast = tC_tableshiftcast(b);
utD_itableshift = tD_itableshift(b);
utE_itableuchar = tE_itableuchar(b);
utF_itableshiftcast = tF_itableshiftcast(b);
utG_parallel = tG_parallel(b);
utH_hamming = tH_hamming(b);
utI_Scharnagl = tI_Scharnagl(b);
utJ_Scharnagl = tJ_Scharnagl(b);
if ((ut0_and_masks != ut1_and_masks) ||
(ut0_and_masks != ut2_and_masks) ||
(ut0_and_masks != ut3_rmlsbsub) ||
(ut0_and_masks != ut4_rmlsbmask) ||
(ut0_and_masks != ut5_testlsb) ||
(ut0_and_masks != ut6_testmsb) ||
(ut0_and_masks != ut7_testsignandshift) ||
(ut0_and_masks != ut8_testeachbit) ||
(ut0_and_masks != ut9_testeachbit1shl) ||
(ut0_and_masks != utA_tableshift) ||
(ut0_and_masks != utB_tableuchar) ||
(ut0_and_masks != utC_tableshiftcast) ||
(ut0_and_masks != utD_itableshift) ||
(ut0_and_masks != utE_itableuchar) ||
(ut0_and_masks != utF_itableshiftcast) ||
(ut0_and_masks != utG_parallel) ||
(ut0_and_masks != utH_hamming) ||
(ut0_and_masks != utI_Scharnagl) ||
(ut0_and_masks != utJ_Scharnagl)
) {
printf("0: %u 1: %u\n", ut0_and_masks, ut1_and_masks);
printf("0: %u 2: %u\n", ut0_and_masks, ut2_and_masks);
printf("0: %u 3: %u\n", ut0_and_masks, ut3_rmlsbsub);
printf("0: %u 4: %u\n", ut0_and_masks, ut4_rmlsbmask);
printf("0: %u 5: %u\n", ut0_and_masks, ut5_testlsb);
printf("0: %u 6: %u\n", ut0_and_masks, ut6_testmsb);
printf("0: %u 7: %u\n", ut0_and_masks, ut7_testsignandshift);
printf("0: %u 8: %u\n", ut0_and_masks, ut8_testeachbit);
printf("0: %u 9: %u\n", ut0_and_masks, ut9_testeachbit1shl);
printf("0: %u A: %u\n", ut0_and_masks, utA_tableshift);
printf("0: %u B: %u\n", ut0_and_masks, utB_tableuchar);
printf("0: %u C: %u\n", ut0_and_masks, utC_tableshiftcast);
printf("0: %u D: %u\n", ut0_and_masks, utD_itableshift);
printf("0: %u E: %u\n", ut0_and_masks, utE_itableuchar);
printf("0: %u F: %u\n", ut0_and_masks, utF_itableshiftcast);
printf("0: %u G: %u\n", ut0_and_masks, utG_parallel);
printf("0: %u H: %u\n", ut0_and_masks, utH_hamming);
printf("0: %u I: %u\n", ut0_and_masks, utI_Scharnagl);
printf("0: %u J: %u\n", ut0_and_masks, utJ_Scharnagl);
}
b = 1;
ut0_and_masks = t0_and_masks(b);
ut1_and_masks = t1_and_masks(b);
ut2_and_masks = t2_and_masks(b);
ut3_rmlsbsub = t3_rmlsbsub(b);
ut4_rmlsbmask = t4_rmlsbmask(b);
ut5_testlsb = t5_testlsb(b);
ut6_testmsb = t6_testmsb(b);
ut7_testsignandshift = t7_testsignandshift(b);
ut8_testeachbit = t8_testeachbit(b);
ut9_testeachbit1shl = t9_testeachbit1shl(b);
utA_tableshift = tA_tableshift(b);
utB_tableuchar = tB_tableuchar(b);
utC_tableshiftcast = tC_tableshiftcast(b);
utD_itableshift = tD_itableshift(b);
utE_itableuchar = tE_itableuchar(b);
utF_itableshiftcast = tF_itableshiftcast(b);
utG_parallel = tG_parallel(b);
utH_hamming = tH_hamming(b);
utI_Scharnagl = tI_Scharnagl(b);
utJ_Scharnagl = tJ_Scharnagl(b);
if ((ut0_and_masks != ut1_and_masks) ||
(ut0_and_masks != ut2_and_masks) ||
(ut0_and_masks != ut3_rmlsbsub) ||
(ut0_and_masks != ut4_rmlsbmask) ||
(ut0_and_masks != ut5_testlsb) ||
(ut0_and_masks != ut6_testmsb) ||
(ut0_and_masks != ut7_testsignandshift) ||
(ut0_and_masks != ut8_testeachbit) ||
(ut0_and_masks != ut9_testeachbit1shl) ||
(ut0_and_masks != utA_tableshift) ||
(ut0_and_masks != utB_tableuchar) ||
(ut0_and_masks != utC_tableshiftcast) ||
(ut0_and_masks != utD_itableshift) ||
(ut0_and_masks != utE_itableuchar) ||
(ut0_and_masks != utF_itableshiftcast) ||
(ut0_and_masks != utG_parallel) ||
(ut0_and_masks != utH_hamming) ||
(ut0_and_masks != utI_Scharnagl) ||
(ut0_and_masks != utJ_Scharnagl)
) {
printf("0: %u 1: %u\n", ut0_and_masks, ut1_and_masks);
printf("0: %u 2: %u\n", ut0_and_masks, ut2_and_masks);
printf("0: %u 3: %u\n", ut0_and_masks, ut3_rmlsbsub);
printf("0: %u 4: %u\n", ut0_and_masks, ut4_rmlsbmask);
printf("0: %u 5: %u\n", ut0_and_masks, ut5_testlsb);
printf("0: %u 6: %u\n", ut0_and_masks, ut6_testmsb);
printf("0: %u 7: %u\n", ut0_and_masks, ut7_testsignandshift);
printf("0: %u 8: %u\n", ut0_and_masks, ut8_testeachbit);
printf("0: %u 9: %u\n", ut0_and_masks, ut9_testeachbit1shl);
printf("0: %u A: %u\n", ut0_and_masks, utA_tableshift);
printf("0: %u B: %u\n", ut0_and_masks, utB_tableuchar);
printf("0: %u C: %u\n", ut0_and_masks, utC_tableshiftcast);
printf("0: %u D: %u\n", ut0_and_masks, utD_itableshift);
printf("0: %u E: %u\n", ut0_and_masks, utE_itableuchar);
printf("0: %u F: %u\n", ut0_and_masks, utF_itableshiftcast);
printf("0: %u G: %u\n", ut0_and_masks, utG_parallel);
printf("0: %u H: %u\n", ut0_and_masks, utH_hamming);
printf("0: %u I: %u\n", ut0_and_masks, utI_Scharnagl);
printf("0: %u J: %u\n", ut0_and_masks, utJ_Scharnagl);
}
}
return 0;
}
#endif
Initialized rand Table: 500 ms
t0_and_masks: 234 ms
t1_and_masks: 204 ms
t2_and_masks: 140 ms
t3_rmlsbsub: 969 ms
t4_rmlsbmask: 750 ms
t5_testlsb: 1672 ms
t6_testmsb: 8797 ms
t7_testsignandshift: 6860 ms
t8_testeachbit: 8156 ms
t9_testeachbit1shl: 6531 ms
tA_tableshift: 172 ms
tB_tableuchar: 203 ms
tC_tableshiftcast: 172 ms
tD_itableshift: 172 ms
tE_itableuchar: 188 ms
tF_itableshiftcast: 172 ms
tG_parallel: 250 ms
tH_hamming: 234 ms
tI_Scharnagl: 125 ms
tJ_Scharnagl: 125 ms
Initialized rand Table for 1 bits: 141 ms
t0_and_masks: 234 ms
t1_and_masks: 172 ms
t2_and_masks: 140 ms
t3_rmlsbsub: 125 ms
t4_rmlsbmask: 94 ms
t5_testlsb: 563 ms
t6_testmsb: 4265 ms
t7_testsignandshift: 2657 ms
t8_testeachbit: 5766 ms
t9_testeachbit1shl: 3672 ms
tA_tableshift: 171 ms
tB_tableuchar: 204 ms
tC_tableshiftcast: 171 ms
tD_itableshift: 172 ms
tE_itableuchar: 188 ms
tF_itableshiftcast: 172 ms
tG_parallel: 250 ms
tH_hamming: 234 ms
tI_Scharnagl: 141 ms
tJ_Scharnagl: 125 ms
Initialized rand Table for 2 bits: 218 ms
t0_and_masks: 235 ms
t1_and_masks: 172 ms
t2_and_masks: 156 ms
t3_rmlsbsub: 172 ms
t4_rmlsbmask: 156 ms
t5_testlsb: 703 ms
t6_testmsb: 4844 ms
t7_testsignandshift: 1875 ms
t8_testeachbit: 6735 ms
t9_testeachbit1shl: 4281 ms
tA_tableshift: 172 ms
tB_tableuchar: 187 ms
tC_tableshiftcast: 188 ms
tD_itableshift: 422 ms
tE_itableuchar: 515 ms
tF_itableshiftcast: 188 ms
tG_parallel: 250 ms
tH_hamming: 234 ms
tI_Scharnagl: 125 ms
tJ_Scharnagl: 141 ms
Initialized rand Table for 3 bits: 1016 ms
t0_and_masks: 1078 ms
t1_and_masks: 328 ms
t2_and_masks: 344 ms
t3_rmlsbsub: 234 ms
t4_rmlsbmask: 203 ms
t5_testlsb: 797 ms
t6_testmsb: 5485 ms
t7_testsignandshift: 1984 ms
t8_testeachbit: 6406 ms
t9_testeachbit1shl: 4469 ms
tA_tableshift: 172 ms
tB_tableuchar: 188 ms
tC_tableshiftcast: 187 ms
tD_itableshift: 172 ms
tE_itableuchar: 188 ms
tF_itableshiftcast: 218 ms
tG_parallel: 282 ms
tH_hamming: 250 ms
tI_Scharnagl: 156 ms
tJ_Scharnagl: 125 ms
Initialized rand Table for 4 bits: 625 ms
t0_and_masks: 266 ms
t1_and_masks: 187 ms
t2_and_masks: 156 ms
t3_rmlsbsub: 328 ms
t4_rmlsbmask: 266 ms
t5_testlsb: 859 ms
t6_testmsb: 7001 ms
t7_testsignandshift: 3797 ms
t8_testeachbit: 8281 ms
t9_testeachbit1shl: 3906 ms
tA_tableshift: 172 ms
tB_tableuchar: 188 ms
tC_tableshiftcast: 172 ms
tD_itableshift: 172 ms
tE_itableuchar: 203 ms
tF_itableshiftcast: 172 ms
tG_parallel: 250 ms
tH_hamming: 250 ms
tI_Scharnagl: 140 ms
tJ_Scharnagl: 141 ms
Initialized rand Table for 5 bits: 547 ms
t0_and_masks: 250 ms
t1_and_masks: 172 ms
t2_and_masks: 171 ms
t3_rmlsbsub: 344 ms
t4_rmlsbmask: 297 ms
t5_testlsb: 906 ms
t6_testmsb: 5735 ms
t7_testsignandshift: 2219 ms
t8_testeachbit: 7062 ms
t9_testeachbit1shl: 4079 ms
tA_tableshift: 218 ms
tB_tableuchar: 313 ms
tC_tableshiftcast: 172 ms
tD_itableshift: 187 ms
tE_itableuchar: 188 ms
tF_itableshiftcast: 187 ms
tG_parallel: 266 ms
tH_hamming: 297 ms
tI_Scharnagl: 125 ms
tJ_Scharnagl: 125 ms
Initialized rand Table for 6 bits: 625 ms
t0_and_masks: 250 ms
t1_and_masks: 187 ms
t2_and_masks: 157 ms
t3_rmlsbsub: 421 ms
t4_rmlsbmask: 344 ms
t5_testlsb: 1594 ms
t6_testmsb: 6797 ms
t7_testsignandshift: 2485 ms
t8_testeachbit: 5468 ms
t9_testeachbit1shl: 4797 ms
tA_tableshift: 172 ms
tB_tableuchar: 219 ms
tC_tableshiftcast: 188 ms
tD_itableshift: 171 ms
tE_itableuchar: 204 ms
tF_itableshiftcast: 187 ms
tG_parallel: 281 ms
tH_hamming: 266 ms
tI_Scharnagl: 125 ms
tJ_Scharnagl: 297 ms
Initialized rand Table for 7 bits: 1000 ms
t0_and_masks: 265 ms
t1_and_masks: 188 ms
t2_and_masks: 156 ms
t3_rmlsbsub: 485 ms
t4_rmlsbmask: 390 ms
t5_testlsb: 1453 ms
t6_testmsb: 6079 ms
t7_testsignandshift: 2578 ms
t8_testeachbit: 6156 ms
t9_testeachbit1shl: 4375 ms
tA_tableshift: 188 ms
tB_tableuchar: 187 ms
tC_tableshiftcast: 172 ms
tD_itableshift: 172 ms
tE_itableuchar: 266 ms
tF_itableshiftcast: 172 ms
tG_parallel: 250 ms
tH_hamming: 265 ms
tI_Scharnagl: 125 ms
tJ_Scharnagl: 157 ms
Initialized rand Table for 8 bits: 968 ms
t0_and_masks: 282 ms
t1_and_masks: 156 ms
t2_and_masks: 172 ms
t3_rmlsbsub: 515 ms
t4_rmlsbmask: 422 ms
t5_testlsb: 1000 ms
t6_testmsb: 7376 ms
t7_testsignandshift: 3281 ms
t8_testeachbit: 6047 ms
t9_testeachbit1shl: 4610 ms
tA_tableshift: 203 ms
tB_tableuchar: 328 ms
tC_tableshiftcast: 281 ms
tD_itableshift: 250 ms
tE_itableuchar: 188 ms
tF_itableshiftcast: 203 ms
tG_parallel: 250 ms
tH_hamming: 250 ms
tI_Scharnagl: 140 ms
tJ_Scharnagl: 141 ms
Initialized rand Table for 9 bits: 953 ms
t0_and_masks: 250 ms
t1_and_masks: 172 ms
t2_and_masks: 156 ms
t3_rmlsbsub: 563 ms
t4_rmlsbmask: 469 ms
t5_testlsb: 1046 ms
t6_testmsb: 9032 ms
t7_testsignandshift: 3906 ms
t8_testeachbit: 6063 ms
t9_testeachbit1shl: 4859 ms
tA_tableshift: 172 ms
tB_tableuchar: 188 ms
tC_tableshiftcast: 219 ms
tD_itableshift: 187 ms
tE_itableuchar: 203 ms
tF_itableshiftcast: 188 ms
tG_parallel: 250 ms
tH_hamming: 265 ms
tI_Scharnagl: 125 ms
tJ_Scharnagl: 453 ms
Initialized rand Table for 10 bits: 1344 ms
t0_and_masks: 250 ms
t1_and_masks: 860 ms
t2_and_masks: 406 ms
t3_rmlsbsub: 797 ms
t4_rmlsbmask: 734 ms
t5_testlsb: 1125 ms
t6_testmsb: 7250 ms
t7_testsignandshift: 3313 ms
t8_testeachbit: 8094 ms
t9_testeachbit1shl: 6016 ms
tA_tableshift: 187 ms
tB_tableuchar: 563 ms
tC_tableshiftcast: 172 ms
tD_itableshift: 187 ms
tE_itableuchar: 203 ms
tF_itableshiftcast: 172 ms
tG_parallel: 281 ms
tH_hamming: 250 ms
tI_Scharnagl: 157 ms
tJ_Scharnagl: 125 ms
Initialized rand Table for 11 bits: 1172 ms
t0_and_masks: 250 ms
t1_and_masks: 171 ms
t2_and_masks: 157 ms
t3_rmlsbsub: 687 ms
t4_rmlsbmask: 1500 ms
t5_testlsb: 2047 ms
t6_testmsb: 7594 ms
t7_testsignandshift: 3578 ms
t8_testeachbit: 6188 ms
t9_testeachbit1shl: 4844 ms
tA_tableshift: 172 ms
tB_tableuchar: 187 ms
tC_tableshiftcast: 172 ms
tD_itableshift: 172 ms
tE_itableuchar: 859 ms
tF_itableshiftcast: 157 ms
tG_parallel: 281 ms
tH_hamming: 250 ms
tI_Scharnagl: 125 ms
tJ_Scharnagl: 125 ms
Initialized rand Table for 12 bits: 1250 ms
t0_and_masks: 578 ms
t1_and_masks: 359 ms
t2_and_masks: 157 ms
t3_rmlsbsub: 609 ms
t4_rmlsbmask: 531 ms
t5_testlsb: 1360 ms
t6_testmsb: 7672 ms
t7_testsignandshift: 3344 ms
t8_testeachbit: 6687 ms
t9_testeachbit1shl: 5157 ms
tA_tableshift: 156 ms
tB_tableuchar: 203 ms
tC_tableshiftcast: 188 ms
tD_itableshift: 172 ms
tE_itableuchar: 187 ms
tF_itableshiftcast: 172 ms
tG_parallel: 234 ms
tH_hamming: 250 ms
tI_Scharnagl: 141 ms
tJ_Scharnagl: 125 ms
Initialized rand Table for 13 bits: 1344 ms
t0_and_masks: 234 ms
t1_and_masks: 234 ms
t2_and_masks: 157 ms
t3_rmlsbsub: 656 ms
t4_rmlsbmask: 563 ms
t5_testlsb: 1078 ms
t6_testmsb: 6484 ms
t7_testsignandshift: 2922 ms
t8_testeachbit: 6813 ms
t9_testeachbit1shl: 4844 ms
tA_tableshift: 172 ms
tB_tableuchar: 187 ms
tC_tableshiftcast: 188 ms
tD_itableshift: 171 ms
tE_itableuchar: 188 ms
tF_itableshiftcast: 172 ms
tG_parallel: 234 ms
tH_hamming: 250 ms
tI_Scharnagl: 125 ms
tJ_Scharnagl: 125 ms
Initialized rand Table for 14 bits: 1438 ms
t0_and_masks: 234 ms
t1_and_masks: 172 ms
t2_and_masks: 156 ms
t3_rmlsbsub: 703 ms
t4_rmlsbmask: 563 ms
t5_testlsb: 1203 ms
t6_testmsb: 8313 ms
t7_testsignandshift: 3531 ms
t8_testeachbit: 9063 ms
t9_testeachbit1shl: 5016 ms
tA_tableshift: 218 ms
tB_tableuchar: 188 ms
tC_tableshiftcast: 625 ms
tD_itableshift: 312 ms
tE_itableuchar: 204 ms
tF_itableshiftcast: 171 ms
tG_parallel: 250 ms
tH_hamming: 250 ms
tI_Scharnagl: 125 ms
tJ_Scharnagl: 141 ms
Initialized rand Table for 15 bits: 1719 ms
t0_and_masks: 266 ms
t1_and_masks: 187 ms
t2_and_masks: 156 ms
t3_rmlsbsub: 782 ms
t4_rmlsbmask: 609 ms
t5_testlsb: 1234 ms
t6_testmsb: 7219 ms
t7_testsignandshift: 3078 ms
t8_testeachbit: 6813 ms
t9_testeachbit1shl: 4438 ms
tA_tableshift: 172 ms
tB_tableuchar: 187 ms
tC_tableshiftcast: 172 ms
tD_itableshift: 172 ms
tE_itableuchar: 187 ms
tF_itableshiftcast: 172 ms
tG_parallel: 250 ms
tH_hamming: 235 ms
tI_Scharnagl: 140 ms
tJ_Scharnagl: 125 ms
Initialized rand Table for 16 bits: 1625 ms
t0_and_masks: 235 ms
t1_and_masks: 172 ms
t2_and_masks: 156 ms
t3_rmlsbsub: 734 ms
t4_rmlsbmask: 625 ms
t5_testlsb: 1172 ms
t6_testmsb: 7766 ms
t7_testsignandshift: 3313 ms
t8_testeachbit: 8922 ms
t9_testeachbit1shl: 4953 ms
tA_tableshift: 203 ms
tB_tableuchar: 203 ms
tC_tableshiftcast: 188 ms
tD_itableshift: 188 ms
tE_itableuchar: 203 ms
tF_itableshiftcast: 203 ms
tG_parallel: 266 ms
tH_hamming: 234 ms
tI_Scharnagl: 141 ms
tJ_Scharnagl: 140 ms
Initialized rand Table for 17 bits: 2000 ms
t0_and_masks: 360 ms
t1_and_masks: 172 ms
t2_and_masks: 156 ms
t3_rmlsbsub: 797 ms
t4_rmlsbmask: 781 ms
t5_testlsb: 1188 ms
t6_testmsb: 6843 ms
t7_testsignandshift: 3157 ms
t8_testeachbit: 6469 ms
t9_testeachbit1shl: 5328 ms
tA_tableshift: 187 ms
tB_tableuchar: 188 ms
tC_tableshiftcast: 172 ms
tD_itableshift: 172 ms
tE_itableuchar: 203 ms
tF_itableshiftcast: 172 ms
tG_parallel: 250 ms
tH_hamming: 234 ms
tI_Scharnagl: 125 ms
tJ_Scharnagl: 125 ms
Initialized rand Table for 18 bits: 1844 ms
t0_and_masks: 234 ms
t1_and_masks: 172 ms
t2_and_masks: 156 ms
t3_rmlsbsub: 797 ms
t4_rmlsbmask: 657 ms
t5_testlsb: 1187 ms
t6_testmsb: 7813 ms
t7_testsignandshift: 3766 ms
t8_testeachbit: 6343 ms
t9_testeachbit1shl: 4813 ms
tA_tableshift: 203 ms
tB_tableuchar: 235 ms
tC_tableshiftcast: 187 ms
tD_itableshift: 188 ms
tE_itableuchar: 250 ms
tF_itableshiftcast: 297 ms
tG_parallel: 375 ms
tH_hamming: 250 ms
tI_Scharnagl: 203 ms
tJ_Scharnagl: 140 ms
Initialized rand Table for 19 bits: 2391 ms
t0_and_masks: 234 ms
t1_and_masks: 172 ms
t2_and_masks: 156 ms
t3_rmlsbsub: 922 ms
t4_rmlsbmask: 704 ms
t5_testlsb: 1250 ms
t6_testmsb: 6937 ms
t7_testsignandshift: 5203 ms
t8_testeachbit: 7438 ms
t9_testeachbit1shl: 4641 ms
tA_tableshift: 156 ms
tB_tableuchar: 203 ms
tC_tableshiftcast: 172 ms
tD_itableshift: 156 ms
tE_itableuchar: 204 ms
tF_itableshiftcast: 171 ms
tG_parallel: 250 ms
tH_hamming: 250 ms
tI_Scharnagl: 125 ms
tJ_Scharnagl: 141 ms
Initialized rand Table for 20 bits: 2016 ms
t0_and_masks: 234 ms
t1_and_masks: 156 ms
t2_and_masks: 157 ms
t3_rmlsbsub: 843 ms
t4_rmlsbmask: 688 ms
t5_testlsb: 1219 ms
t6_testmsb: 6703 ms
t7_testsignandshift: 3453 ms
t8_testeachbit: 6563 ms
t9_testeachbit1shl: 5906 ms
tA_tableshift: 172 ms
tB_tableuchar: 219 ms
tC_tableshiftcast: 172 ms
tD_itableshift: 172 ms
tE_itableuchar: 187 ms
tF_itableshiftcast: 172 ms
tG_parallel: 266 ms
tH_hamming: 281 ms
tI_Scharnagl: 125 ms
tJ_Scharnagl: 141 ms
Initialized rand Table for 21 bits: 2515 ms
t0_and_masks: 235 ms
t1_and_masks: 172 ms
t2_and_masks: 140 ms
t3_rmlsbsub: 953 ms
t4_rmlsbmask: 766 ms
t5_testlsb: 1563 ms
t6_testmsb: 7531 ms
t7_testsignandshift: 3781 ms
t8_testeachbit: 6782 ms
t9_testeachbit1shl: 5000 ms
tA_tableshift: 172 ms
tB_tableuchar: 203 ms
tC_tableshiftcast: 172 ms
tD_itableshift: 187 ms
tE_itableuchar: 188 ms
tF_itableshiftcast: 172 ms
tG_parallel: 250 ms
tH_hamming: 250 ms
tI_Scharnagl: 125 ms
tJ_Scharnagl: 125 ms
Initialized rand Table for 22 bits: 2297 ms
t0_and_masks: 250 ms
t1_and_masks: 171 ms
t2_and_masks: 157 ms
t3_rmlsbsub: 937 ms
t4_rmlsbmask: 766 ms
t5_testlsb: 1875 ms
t6_testmsb: 7297 ms
t7_testsignandshift: 3359 ms
t8_testeachbit: 7048 ms
t9_testeachbit1shl: 4859 ms
tA_tableshift: 156 ms
tB_tableuchar: 375 ms
tC_tableshiftcast: 203 ms
tD_itableshift: 172 ms
tE_itableuchar: 188 ms
tF_itableshiftcast: 203 ms
tG_parallel: 250 ms
tH_hamming: 266 ms
tI_Scharnagl: 140 ms
tJ_Scharnagl: 125 ms
Initialized rand Table for 23 bits: 2469 ms
t0_and_masks: 266 ms
t1_and_masks: 203 ms
t2_and_masks: 203 ms
t3_rmlsbsub: 969 ms
t4_rmlsbmask: 1125 ms
t5_testlsb: 1406 ms
t6_testmsb: 6953 ms
t7_testsignandshift: 3266 ms
t8_testeachbit: 6110 ms
t9_testeachbit1shl: 4859 ms
tA_tableshift: 172 ms
tB_tableuchar: 187 ms
tC_tableshiftcast: 172 ms
tD_itableshift: 172 ms
tE_itableuchar: 203 ms
tF_itableshiftcast: 157 ms
tG_parallel: 250 ms
tH_hamming: 250 ms
tI_Scharnagl: 125 ms
tJ_Scharnagl: 125 ms
Initialized rand Table for 24 bits: 2375 ms
t0_and_masks: 234 ms
t1_and_masks: 172 ms
t2_and_masks: 156 ms
t3_rmlsbsub: 938 ms
t4_rmlsbmask: 1156 ms
t5_testlsb: 1328 ms
t6_testmsb: 7313 ms
t7_testsignandshift: 3422 ms
t8_testeachbit: 6766 ms
t9_testeachbit1shl: 6406 ms
tA_tableshift: 172 ms
tB_tableuchar: 187 ms
tC_tableshiftcast: 172 ms
tD_itableshift: 172 ms
tE_itableuchar: 188 ms
tF_itableshiftcast: 172 ms
tG_parallel: 250 ms
tH_hamming: 234 ms
tI_Scharnagl: 125 ms
tJ_Scharnagl: 125 ms
Initialized rand Table for 25 bits: 2500 ms
t0_and_masks: 250 ms
t1_and_masks: 156 ms
t2_and_masks: 156 ms
t3_rmlsbsub: 969 ms
t4_rmlsbmask: 797 ms
t5_testlsb: 1265 ms
t6_testmsb: 6688 ms
t7_testsignandshift: 3235 ms
t8_testeachbit: 6062 ms
t9_testeachbit1shl: 4703 ms
tA_tableshift: 360 ms
tB_tableuchar: 203 ms
tC_tableshiftcast: 172 ms
tD_itableshift: 187 ms
tE_itableuchar: 188 ms
tF_itableshiftcast: 187 ms
tG_parallel: 266 ms
tH_hamming: 250 ms
tI_Scharnagl: 125 ms
tJ_Scharnagl: 125 ms
Initialized rand Table for 26 bits: 2781 ms
t0_and_masks: 250 ms
t1_and_masks: 188 ms
t2_and_masks: 156 ms
t3_rmlsbsub: 1110 ms
t4_rmlsbmask: 1015 ms
t5_testlsb: 1422 ms
t6_testmsb: 6969 ms
t7_testsignandshift: 3922 ms
t8_testeachbit: 8141 ms
t9_testeachbit1shl: 6703 ms
tA_tableshift: 188 ms
tB_tableuchar: 406 ms
tC_tableshiftcast: 328 ms
tD_itableshift: 328 ms
tE_itableuchar: 203 ms
tF_itableshiftcast: 172 ms
tG_parallel: 250 ms
tH_hamming: 250 ms
tI_Scharnagl: 125 ms
tJ_Scharnagl: 141 ms
Initialized rand Table for 27 bits: 3172 ms
t0_and_masks: 250 ms
t1_and_masks: 172 ms
t2_and_masks: 140 ms
t3_rmlsbsub: 1078 ms
t4_rmlsbmask: 829 ms
t5_testlsb: 1734 ms
t6_testmsb: 7844 ms
t7_testsignandshift: 3234 ms
t8_testeachbit: 6219 ms
t9_testeachbit1shl: 4704 ms
tA_tableshift: 187 ms
tB_tableuchar: 188 ms
tC_tableshiftcast: 171 ms
tD_itableshift: 172 ms
tE_itableuchar: 188 ms
tF_itableshiftcast: 187 ms
tG_parallel: 250 ms
tH_hamming: 235 ms
tI_Scharnagl: 125 ms
tJ_Scharnagl: 125 ms
Initialized rand Table for 28 bits: 3844 ms
t0_and_masks: 250 ms
t1_and_masks: 171 ms
t2_and_masks: 250 ms
t3_rmlsbsub: 1188 ms
t4_rmlsbmask: 875 ms
t5_testlsb: 1344 ms
t6_testmsb: 8109 ms
t7_testsignandshift: 4594 ms
t8_testeachbit: 7657 ms
t9_testeachbit1shl: 5265 ms
tA_tableshift: 157 ms
tB_tableuchar: 203 ms
tC_tableshiftcast: 172 ms
tD_itableshift: 187 ms
tE_itableuchar: 188 ms
tF_itableshiftcast: 172 ms
tG_parallel: 250 ms
tH_hamming: 250 ms
tI_Scharnagl: 125 ms
tJ_Scharnagl: 125 ms
Initialized rand Table for 29 bits: 3562 ms
t0_and_masks: 266 ms
t1_and_masks: 437 ms
t2_and_masks: 141 ms
t3_rmlsbsub: 2391 ms
t4_rmlsbmask: 1672 ms
t5_testlsb: 3359 ms
t6_testmsb: 7985 ms
t7_testsignandshift: 3484 ms
t8_testeachbit: 6578 ms
t9_testeachbit1shl: 6860 ms
tA_tableshift: 781 ms
tB_tableuchar: 203 ms
tC_tableshiftcast: 172 ms
tD_itableshift: 375 ms
tE_itableuchar: 188 ms
tF_itableshiftcast: 187 ms
tG_parallel: 235 ms
tH_hamming: 765 ms
tI_Scharnagl: 141 ms
tJ_Scharnagl: 141 ms
Initialized rand Table for 30 bits: 3375 ms
t0_and_masks: 765 ms
t1_and_masks: 422 ms
t2_and_masks: 156 ms
t3_rmlsbsub: 1813 ms
t4_rmlsbmask: 1062 ms
t5_testlsb: 1391 ms
t6_testmsb: 6844 ms
t7_testsignandshift: 3172 ms
t8_testeachbit: 5953 ms
t9_testeachbit1shl: 5047 ms
tA_tableshift: 157 ms
tB_tableuchar: 500 ms
tC_tableshiftcast: 484 ms
tD_itableshift: 172 ms
tE_itableuchar: 187 ms
tF_itableshiftcast: 172 ms
tG_parallel: 250 ms
tH_hamming: 235 ms
tI_Scharnagl: 140 ms
tJ_Scharnagl: 125 ms
Initialized rand Table for 31 bits: 3063 ms
t0_and_masks: 250 ms
t1_and_masks: 156 ms
t2_and_masks: 156 ms
t3_rmlsbsub: 1063 ms
t4_rmlsbmask: 890 ms
t5_testlsb: 1344 ms
t6_testmsb: 8266 ms
t7_testsignandshift: 6344 ms
t8_testeachbit: 7235 ms
t9_testeachbit1shl: 5125 ms
tA_tableshift: 172 ms
tB_tableuchar: 187 ms
tC_tableshiftcast: 172 ms
tD_itableshift: 250 ms
tE_itableuchar: 187 ms
tF_itableshiftcast: 188 ms
tG_parallel: 250 ms
tH_hamming: 250 ms
tI_Scharnagl: 125 ms
tJ_Scharnagl: 125 ms
Initialized rand Table for 32 bits: 3172 ms
t0_and_masks: 359 ms
t1_and_masks: 172 ms
t2_and_masks: 156 ms
t3_rmlsbsub: 1329 ms
t4_rmlsbmask: 984 ms
t5_testlsb: 1453 ms
t6_testmsb: 6672 ms
t7_testsignandshift: 3219 ms
t8_testeachbit: 5969 ms
t9_testeachbit1shl: 4547 ms
tA_tableshift: 172 ms
tB_tableuchar: 187 ms
tC_tableshiftcast: 172 ms
tD_itableshift: 172 ms
tE_itableuchar: 203 ms
tF_itableshiftcast: 172 ms
tG_parallel: 250 ms
tH_hamming: 235 ms
tI_Scharnagl: 140 ms
tJ_Scharnagl: 125 ms
/* Trial (R. Scharnagl, second idea) */
/* (endian independent form) */
unsigned tK_Scharnagl(bitboard b)
{
unsigned buf;
unsigned acc;
buf = (unsigned)b;
acc = buf;
acc -= ((buf &= msk1)>>1);
acc -= ((buf &= msk2)>>2);
acc -= ((buf &= msk3)>>3);
buf = (unsigned)(b>>32);
acc += buf;
acc -= ((buf &= msk1)>>1);
acc -= ((buf &= msk2)>>2);
acc -= ((buf &= msk3)>>3);
acc = (acc & msk4) + ((acc >> 4) & msk4);
acc = (acc & 0xFFFF) + (acc >> 16);
acc = (acc & 0xFF) + (acc >> 8);
return acc;
}
PUBLIC @tI_Scharnagl@8
; Function compile flags: /Ogty
; COMDAT @tI_Scharnagl@8
_TEXT SEGMENT
_b$ = 8 ; size = 8
@tI_Scharnagl@8 PROC NEAR ; COMDAT
; 292 : unsigned buf;
; 293 : unsigned acc;
; 294 :
; 295 : buf = (unsigned) b;
; 296 : acc = buf - ((buf & msk1) >> 1)
; 297 : - ((buf & msk2) >> 2)
; 298 : - ((buf & msk3) >> 3);
mov ecx, DWORD PTR _b$[esp-4]
mov edx, ecx
shr edx, 3
and edx, 286331153 ; 11111111H
mov eax, ecx
sub eax, edx
mov edx, ecx
shr edx, 2
and edx, 858993459 ; 33333333H
sub eax, edx
shr ecx, 1
and ecx, 2004318071 ; 77777777H
sub eax, ecx
; 299 : buf = ((unsigned *) &b)[1]; /* Intel format */
; 300 : acc += buf - ((buf & msk1) >> 1)
; 301 : - ((buf & msk2) >> 2)
; 302 : - ((buf & msk3) >> 3);
mov ecx, DWORD PTR _b$[esp]
mov edx, ecx
shr edx, 3
and edx, 286331153 ; 11111111H
push esi
mov esi, ecx
sub esi, edx
mov edx, ecx
shr edx, 2
shr ecx, 1
and edx, 858993459 ; 33333333H
sub esi, edx
and ecx, 2004318071 ; 77777777H
sub esi, ecx
add eax, esi
; 303 : acc = (acc & msk4) + ((acc >> 4) & msk4);
mov ecx, eax
shr ecx, 4
and eax, 252645135 ; 0f0f0f0fH
and ecx, 252645135 ; 0f0f0f0fH
add ecx, eax
; 304 : acc = (acc & 0xFFFF) + (acc >> 16);
mov edx, ecx
shr edx, 16 ; 00000010H
and ecx, 65535 ; 0000ffffH
add edx, ecx
; 305 : acc = (acc & 0xFF) + (acc >> 8);
mov eax, edx
and eax, 255 ; 000000ffH
shr edx, 8
add eax, edx
pop esi
; 306 : return acc;
; 307 : }
ret 8
@tI_Scharnagl@8 ENDP
_TEXT ENDS
PUBLIC @tJ_Scharnagl@8
; Function compile flags: /Ogty
; COMDAT @tJ_Scharnagl@8
_TEXT SEGMENT
_b$ = 8 ; size = 8
@tJ_Scharnagl@8 PROC NEAR ; COMDAT
; 311 : unsigned buf;
; 312 : unsigned acc;
; 313 :
; 314 : buf = (unsigned)b;
; 315 : acc = buf - ((buf & msk1)>>1)
; 316 : - ((buf & msk2)>>2)
; 317 : - ((buf & msk3)>>3);
mov ecx, DWORD PTR _b$[esp-4]
mov edx, ecx
shr edx, 3
and edx, 286331153 ; 11111111H
mov eax, ecx
sub eax, edx
mov edx, ecx
shr edx, 2
and edx, 858993459 ; 33333333H
sub eax, edx
shr ecx, 1
and ecx, 2004318071 ; 77777777H
sub eax, ecx
; 318 : buf = ((unsigned *)&b)[1]; /* Intel format */
; 319 : acc += buf - ((buf & msk1)>>1)
; 320 : - ((buf & msk2)>>2)
; 321 : - ((buf & msk3)>>3);
mov ecx, DWORD PTR _b$[esp]
mov edx, ecx
shr edx, 3
and edx, 286331153 ; 11111111H
push esi
mov esi, ecx
sub esi, edx
mov edx, ecx
shr edx, 2
shr ecx, 1
and edx, 858993459 ; 33333333H
sub esi, edx
and ecx, 2004318071 ; 77777777H
sub esi, ecx
add eax, esi
; 322 : acc = (acc & msk4) + ((acc >> 4) & msk4);
mov ecx, eax
shr ecx, 4
and eax, 252645135 ; 0f0f0f0fH
and ecx, 252645135 ; 0f0f0f0fH
add ecx, eax
; 323 : acc = (acc & 0xFFFF) + (acc >> 16);
mov edx, ecx
shr edx, 16 ; 00000010H
and ecx, 65535 ; 0000ffffH
add edx, ecx
; 324 : acc = (acc & 0xFF) + (acc >> 8);
mov eax, edx
and eax, 255 ; 000000ffH
shr edx, 8
add eax, edx
pop esi
; 325 : return acc;
; 326 : }
ret 8
@tJ_Scharnagl@8 ENDP
_TEXT ENDS
PUBLIC @tK_Scharnagl@8
; Function compile flags: /Ogty
; COMDAT @tK_Scharnagl@8
_TEXT SEGMENT
_b$ = 8 ; size = 8
@tK_Scharnagl@8 PROC NEAR ; COMDAT
; 333 : unsigned buf;
; 334 : unsigned acc;
; 335 :
; 336 : buf = (unsigned)b;
; 337 : acc = buf;
; 338 : acc -= ((buf &= msk1)>>1);
mov ecx, DWORD PTR _b$[esp-4]
mov eax, ecx
and eax, -286331154 ; eeeeeeeeH
mov edx, eax
; 339 : acc -= ((buf &= msk2)>>2);
and eax, -858993460 ; ccccccccH
shr edx, 1
sub ecx, edx
; 340 : acc -= ((buf &= msk3)>>3);
mov edx, eax
shr eax, 2
shr edx, 3
and edx, 286331153 ; 11111111H
add edx, eax
; 341 : buf = (unsigned)(b>>32);
mov eax, DWORD PTR _b$[esp]
sub ecx, edx
; 342 : acc += buf;
add ecx, eax
; 343 : acc -= ((buf &= msk1)>>1);
and eax, -286331154 ; eeeeeeeeH
mov edx, eax
; 344 : acc -= ((buf &= msk2)>>2);
and eax, -858993460 ; ccccccccH
shr edx, 1
sub ecx, edx
; 345 : acc -= ((buf &= msk3)>>3);
mov edx, eax
shr eax, 2
shr edx, 3
and edx, 286331153 ; 11111111H
add edx, eax
sub ecx, edx
; 346 : acc = (acc & msk4) + ((acc >> 4) & msk4);
mov eax, ecx
shr eax, 4
and eax, 252645135 ; 0f0f0f0fH
and ecx, 252645135 ; 0f0f0f0fH
add eax, ecx
; 347 : acc = (acc & 0xFFFF) + (acc >> 16);
mov ecx, eax
and eax, 65535 ; 0000ffffH
shr ecx, 16 ; 00000010H
add ecx, eax
; 348 : acc = (acc & 0xFF) + (acc >> 8);
mov eax, ecx
and eax, 255 ; 000000ffH
shr ecx, 8
add eax, ecx
; 349 : return acc;
; 350 : }
ret 8
@tK_Scharnagl@8 ENDP
_TEXT ENDS
Reinhard Scharnagl wrote:P.S.: Have you updated the format of the constants? Does it matter?
PUBLIC @tI_Scharnagl@8
; Function compile flags: /Ogty
; COMDAT @tI_Scharnagl@8
_TEXT SEGMENT
_b$ = 8 ; size = 8
@tI_Scharnagl@8 PROC NEAR ; COMDAT
; 292 : unsigned buf;
; 293 : unsigned acc;
; 294 :
; 295 : buf = (unsigned) b;
; 296 : acc = buf - ((buf & msk1) >> 1)
; 297 : - ((buf & msk2) >> 2)
; 298 : - ((buf & msk3) >> 3);
mov ecx, DWORD PTR _b$[esp-4]
mov edx, ecx
shr edx, 3
and edx, 286331153 ; 11111111H
mov eax, ecx
sub eax, edx
mov edx, ecx
shr edx, 2
and edx, 858993459 ; 33333333H
sub eax, edx
shr ecx, 1
and ecx, 2004318071 ; 77777777H
sub eax, ecx
; 299 : buf = ((unsigned *) &b)[1]; /* Intel format */
; 300 : acc += buf - ((buf & msk1) >> 1)
; 301 : - ((buf & msk2) >> 2)
; 302 : - ((buf & msk3) >> 3);
mov ecx, DWORD PTR _b$[esp]
mov edx, ecx
shr edx, 3
and edx, 286331153 ; 11111111H
push esi
mov esi, ecx
sub esi, edx
mov edx, ecx
shr edx, 2
shr ecx, 1
and edx, 858993459 ; 33333333H
sub esi, edx
and ecx, 2004318071 ; 77777777H
sub esi, ecx
add eax, esi
; 303 : acc = (acc & msk4) + ((acc >> 4) & msk4);
mov ecx, eax
shr ecx, 4
and eax, 252645135 ; 0f0f0f0fH
and ecx, 252645135 ; 0f0f0f0fH
add ecx, eax
; 304 : acc = (acc & 0xFFFF) + (acc >> 16);
mov edx, ecx
shr edx, 16 ; 00000010H
and ecx, 65535 ; 0000ffffH
add edx, ecx
; 305 : acc = (acc & 0xFF) + (acc >> 8);
mov eax, edx
and eax, 255 ; 000000ffH
shr edx, 8
add eax, edx
pop esi
; 306 : return acc;
; 307 : }
ret 8
@tI_Scharnagl@8 ENDP
_TEXT ENDS
PUBLIC @tJ_Scharnagl@8
; Function compile flags: /Ogty
; COMDAT @tJ_Scharnagl@8
_TEXT SEGMENT
_b$ = 8 ; size = 8
@tJ_Scharnagl@8 PROC NEAR ; COMDAT
; 311 : unsigned buf;
; 312 : unsigned acc;
; 313 :
; 314 : buf = (unsigned)b;
; 315 : acc = buf - ((buf & msk1)>>1)
; 316 : - ((buf & msk2)>>2)
; 317 : - ((buf & msk3)>>3);
mov ecx, DWORD PTR _b$[esp-4]
mov edx, ecx
shr edx, 3
and edx, 286331153 ; 11111111H
mov eax, ecx
sub eax, edx
mov edx, ecx
shr edx, 2
and edx, 858993459 ; 33333333H
sub eax, edx
shr ecx, 1
and ecx, 2004318071 ; 77777777H
sub eax, ecx
; 318 : buf = ((unsigned *)&b)[1]; /* Intel format */
; 319 : acc += buf - ((buf & msk1)>>1)
; 320 : - ((buf & msk2)>>2)
; 321 : - ((buf & msk3)>>3);
mov ecx, DWORD PTR _b$[esp]
mov edx, ecx
shr edx, 3
and edx, 286331153 ; 11111111H
push esi
mov esi, ecx
sub esi, edx
mov edx, ecx
shr edx, 2
shr ecx, 1
and edx, 858993459 ; 33333333H
sub esi, edx
and ecx, 2004318071 ; 77777777H
sub esi, ecx
add eax, esi
; 322 : acc = (acc & msk4) + ((acc >> 4) & msk4);
mov ecx, eax
shr ecx, 4
and eax, 252645135 ; 0f0f0f0fH
and ecx, 252645135 ; 0f0f0f0fH
add ecx, eax
; 323 : acc = (acc & 0xFFFF) + (acc >> 16);
mov edx, ecx
shr edx, 16 ; 00000010H
and ecx, 65535 ; 0000ffffH
add edx, ecx
; 324 : acc = (acc & 0xFF) + (acc >> 8);
mov eax, edx
and eax, 255 ; 000000ffH
shr edx, 8
add eax, edx
pop esi
; 325 : return acc;
; 326 : }
ret 8
@tJ_Scharnagl@8 ENDP
_TEXT ENDS
PUBLIC @tK_Scharnagl@8
; Function compile flags: /Ogty
; COMDAT @tK_Scharnagl@8
_TEXT SEGMENT
_b$ = 8 ; size = 8
@tK_Scharnagl@8 PROC NEAR ; COMDAT
; 333 : unsigned buf;
; 334 : unsigned acc;
; 335 :
; 336 : buf = (unsigned)b;
; 337 : acc = buf;
; 338 : acc -= ((buf &= msk1)>>1);
mov ecx, DWORD PTR _b$[esp-4]
mov eax, ecx
and eax, -286331154 ; eeeeeeeeH
mov edx, eax
; 339 : acc -= ((buf &= msk2)>>2);
and eax, -858993460 ; ccccccccH
shr edx, 1
sub ecx, edx
; 340 : acc -= ((buf &= msk3)>>3);
mov edx, eax
shr eax, 2
shr edx, 3
and edx, 286331153 ; 11111111H
add edx, eax
; 341 : buf = (unsigned)(b>>32);
mov eax, DWORD PTR _b$[esp]
sub ecx, edx
; 342 : acc += buf;
add ecx, eax
; 343 : acc -= ((buf &= msk1)>>1);
and eax, -286331154 ; eeeeeeeeH
mov edx, eax
; 344 : acc -= ((buf &= msk2)>>2);
and eax, -858993460 ; ccccccccH
shr edx, 1
sub ecx, edx
; 345 : acc -= ((buf &= msk3)>>3);
mov edx, eax
shr eax, 2
shr edx, 3
and edx, 286331153 ; 11111111H
add edx, eax
sub ecx, edx
; 346 : acc = (acc & msk4) + ((acc >> 4) & msk4);
mov eax, ecx
shr eax, 4
and eax, 252645135 ; 0f0f0f0fH
and ecx, 252645135 ; 0f0f0f0fH
add eax, ecx
; 347 : acc = (acc & 0xFFFF) + (acc >> 16);
mov ecx, eax
and eax, 65535 ; 0000ffffH
shr ecx, 16 ; 00000010H
add ecx, eax
; 348 : acc = (acc & 0xFF) + (acc >> 8);
mov eax, ecx
and eax, 255 ; 000000ffH
shr ecx, 8
add eax, ecx
; 349 : return acc;
; 350 : }
ret 8
@tK_Scharnagl@8 ENDP
_TEXT ENDS
Return to Programming and Technical Discussions
Users browsing this forum: No registered users and 24 guests