Since my old approach with bitboard move generator used pretty much memory (more than 2Mbyte), with possible cache pollution as a result, I have now made another one that uses ~10kbytes and should pollute the cache much less.
- Code: Select all
TBitBoard MaskFile[8]; 64
TBitBoard MaskRank[8]; 64
TBitBoard MaskH8A1[15]; 120
TBitBoard MaskH1A8[15]; 120
U8 DiagH8A1Shift[15]; 15
U8 DiagH1A8Shift_R[15]; 15
U8 DiagH1A8Shift_L[15]; 15
U8 RookAttacksFile[8][64]; 512
U64 HashKeyRank[8]; 64
U64 HashKeyH8A1[8]; 64
U64 HashKeyH1A8[8]; 64
TBitBoard RookAttacksRank[8][32]; 2048
TBitBoard BishopAttacksH8A1[8][32]; 2048
TBitBoard BishopAttacksH1A8[8][32]; 2048
U8 RayH8A1[64]; 64
U8 RayH1A8[64]; 64
U8 RayH8A1Pos[64]; 64
U8 RayH1A8Pos[64]; 64
TBitBoard KingAttacks[64]; 512
TBitBoard KnightAttacks[64]; 512
TBitBoard WPawnAttacks[64]; 512
TBitBoard BPawnAttacks[64]; 512
Total: 9565 bytes
As one can see, it is still a non-rotated approach using hash-keys. Here are some of my results with perft and quasi-legal moves:
- Code: Select all
pos 0:"rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w QKqk -"
pos 1:"r3k2r/p1ppqpb1/bn2pnp1/3PN3/1p2P3/2N2Q1p/PPPBBPPP/R3K2R w QKqk -"
pos 2:"q3k3/6q1/4q3/2q5/5Q2/3Q4/1Q6/3K3Q w - -"
New Old Diff
pos 0 d7: 72.3MNps 90.1MNps 24.6% N=3347724941
pos 1 d6: 74.8MNps 100.8MNps 34.8% N=8427118359
Pos 2 d5: 124.7MNps 137.3MNps 10.1% N=2000148357
So, its still pretty fast, but legality check is more expensive. Here is the code for Queens:
- Code: Select all
Pcs=WQueens;
while (Pcs) {
From=FirstOne2(C=Pcs&-Pcs);
File = From>>3;
Rank = From&7;
// File Attacks
Pattern = ((AllWPieces|AllBPieces)&MaskFile[File])>>((File<<3)+1);
A = (U64)RookAttacksFile[Rank][Pattern]<<(File<<3);
// Rank Attacks
Pattern = ((AllWPieces | AllBPieces) & MaskRank[Rank])>>Rank;
Address = (Pattern*HashKeyRank[File])>>(64-RAT_RANK_BITS);
A |= RookAttacksRank[File][Address]<<Rank;
// H8A1 Attacks
Diag = RayH8A1[From];
RayPos = RayH8A1Pos[From];
Pattern=((AllWPieces|AllBPieces)&MaskH8A1[Diag]&~EDGES)
>>DiagH8A1Shift[Diag];
Address=(Pattern*HashKeyH8A1[RayPos])>>(64-BAT_H8A1_BITS);
A |= (BishopAttacksH8A1[RayPos][Address]<<DiagH8A1Shift[Diag])
&MaskH8A1[Diag];
// H1A8 Attacks
Diag = RayH1A8[From];
RayPos = RayH1A8Pos[From];
Pattern=(((AllWPieces|AllBPieces)&MaskH1A8[Diag]&~EDGES)
>>DiagH1A8Shift_R[Diag])<<DiagH1A8Shift_L[Diag];
Address=(Pattern*HashKeyH1A8[RayPos])>>(64-BAT_H1A8_BITS);
Pattern = A |= ((BishopAttacksH1A8[RayPos][Address]<<DiagH1A8Shift_R[Diag])
>>DiagH1A8Shift_L[Diag]) & MaskH1A8[Diag];
A &= AllBPieces;
if (A & Kings) return -1;
From = (From<<6) + (WQueen<<12);
while (A) {
To=FirstOne2(B=A&-A);
*M++=From+To+(Board[To]<<16);
A ^= B;
}
A=Pattern;
A ^= A & (AllWPieces|AllBPieces);
while (A) {
To=FirstOne2(B=A&-A);
*M++=From+To;
A ^= B;
}
Pcs ^= C;
}
For now I do both captures and non-captures in the same function, saving some overhead. They can easily be put in separate move lists.
I havent done mobility stuff yet, but this would be slower than crafty (except for files), because it would need hashing as well, and some 3-6k more mem usage.
Cheers
Lasse