Snoobing a subset of a set

Programming Topics (Computer Chess) and technical aspects as test techniques, book building, program tuning etc

Moderator: Andres Valverde

Snoobing a subset of a set

Postby Gerd Isenberg » 03 Feb 2007, 20:08

Hi,

combining a ripple adder with the snoob ("same number of one bits") algorithm might be usefull to find magics faster ...

How it works:
Code: Select all
set:
... 1110 0110 0x..e6
eg. all subsets with two bits sets:
... 0000 0110 0x..06
... 0010 0010 0x..22
... 0010 0100 0x..24
... 0100 0010 0x..42
... 0100 0100 0x..44
... 0110 0000 0x..60
... 1000 0010 0x..82
... 1000 0100 0x..84
... 1010 0000 0x..a0
... 1100 0000 0x..c0

If, after a ripple add of the least significat one-bit, the result has only two changed bits - it has already the same population count, and we are ready (01->10). Otherwise with > 2 changed bits (011->100, 0111->1000,...), we need to set changed-2 lower bits from the original set into the result (rip).

In the original snoob with all bits one in the set, this works loopless by:
Code: Select all
 rip = sub + (sub & -sub);
 rip |= ((sub^rip) >> 2) >> bitScan(sub)

With arbitrary and even rare populated sets, i see no other way to implement a loop similar to Kerningham's population count, based on reset least significant one-bit by set&=set-1. If somebody is aware of a loopless version, let me know!
Code: Select all
// get next greater subset of set with same number of one bits
u64 snoob (u64 sub, u64 set) {
   u64 rip = (sub + (sub & -sub) - set - 1) & set;
   for (sub^=rip, sub&=sub-1; sub&=sub-1; set&=set-1)
      rip += set & -set;
   return rip;
}

x64 assembly by vc2005:
Code: Select all
_TEXT   SEGMENT
sub$ = 8
set$ = 16
?snoob@@YA_K_K0@Z PROC
  00000   48 8b c1    mov    rax, rcx
  00003   4c 8b c1    mov    r8, rcx
  00006   48 f7 d8    neg    rax
  00009   48 23 c1    and    rax, rcx
  0000c   48 2b c2    sub    rax, rdx
  0000f   4c 8d 4c 08 ff    lea    r9, QWORD PTR [rax+rcx-1]
  00014   4c 23 ca    and    r9, rdx
  00017   4d 33 c1    xor    r8, r9
  0001a   49 8d 40 ff    lea    rax, QWORD PTR [r8-1]
  0001e   4c 23 c0    and    r8, rax
  00021   49 8d 40 ff    lea    rax, QWORD PTR [r8-1]
  00025   4c 23 c0    and    r8, rax
  00028   74 22       je    SHORT $LN8@snoob
  0002a   66 66 90 66 66
   90       npad    6
$LL3@snoob:
  00030   48 8b ca    mov    rcx, rdx
  00033   48 f7 d9    neg    rcx
  00036   48 23 ca    and    rcx, rdx
  00039   4c 03 c9    add    r9, rcx
  0003c   48 8d 4a ff    lea    rcx, QWORD PTR [rdx-1]
  00040   48 23 d1    and    rdx, rcx
  00043   49 8d 48 ff    lea    rcx, QWORD PTR [r8-1]
  00047   4c 23 c1    and    r8, rcx
  0004a   75 e4       jne    SHORT $LL3@snoob
$LN8@snoob:
  0004c   49 8b c1    mov    rax, r9
  0004f   c3       ret    0
?snoob@@YA_K_K0@Z ENDP
_TEXT   ENDS


[edit]
Ok, this one looks much better ipc-wise, taking advantage of expressions already calculated to reset ls1b in rip. Same for the loop body - shorter and less dependent code.

Code: Select all
// get next greater subset of set with same number of one bits
u64 snoob (u64 sub, u64 set) {
   u64 rip =(sub+(sub&-sub)-set-1)&set, lsb;
   for (sub^=rip^(sub&-sub); sub&=sub-1; rip+=lsb, set^=lsb)
      lsb = set&-set;
   return rip;
}

Code: Select all
_TEXT   SEGMENT
sub$ = 8
set$ = 16
?snoob@@YA_K_K0@Z PROC
  00000   48 8b c1    mov    rax, rcx
  00003   4c 8d 41 ff    lea    r8, QWORD PTR [rcx-1]
  00007   48 f7 d8    neg    rax
  0000a   4c 23 c1    and    r8, rcx
  0000d   48 23 c1    and    rax, rcx
  00010   48 2b c2    sub    rax, rdx
  00013   4c 8d 4c 08 ff    lea    r9, QWORD PTR [rax+rcx-1]
  00018   4c 23 ca    and    r9, rdx
  0001b   4d 33 c1    xor    r8, r9
  0001e   49 8d 40 ff    lea    rax, QWORD PTR [r8-1]
  00022   4c 23 c0    and    r8, rax
  00025   74 21       je    SHORT $LN8@snoob
  00027             npad    9
$LL3@snoob:
  00030   48 8b ca    mov    rcx, rdx
  00033   48 f7 d9    neg    rcx
  00036   48 23 ca    and    rcx, rdx
  00039   4c 03 c9    add    r9, rcx
  0003c   48 33 d1    xor    rdx, rcx
  0003f   49 8d 48 ff    lea    rcx, QWORD PTR [r8-1]
  00043   4c 23 c1    and    r8, rcx
  00046   75 e8       jne    SHORT $LL3@snoob
$LN8@snoob:
  00048   49 8b c1    mov    rax, r9
  0004b   c3       ret    0
?snoob@@YA_K_K0@Z ENDP
_TEXT   ENDS

[/edit]

Cheers,
Gerd
Gerd Isenberg
 
Posts: 285
Joined: 31 Jan 2005, 20:31
Location: Hattingen, Germany

Re: Snoobing a subset of a set

Postby Gerd Isenberg » 04 Feb 2007, 15:42

Finally optimized 64-bit snoob and setwise snoob:

Code: Select all
#pragma intrinsic(_BitScanForward64)

// get next greater value with same number of one bits
u64 snoob (u64 sub) {
   unsigned long  r;  u64  rip;
   _BitScanForward64 (&r, sub);
   rip = sub + (sub&-(i64)sub);
   rip|=( (sub^rip) >>2 ) >> r;
   return rip;
}

Code: Select all
_TEXT   SEGMENT
sub$ = 8
?snoob@@YA_K_K@Z PROC
  00000   4c 8b c1    mov    r8, rcx
  00003   48 0f bc d1    bsf    rdx, rcx
  00007   49 f7 d8    neg    r8
  0000a   4c 23 c1    and    r8, rcx
  0000d   4c 03 c1    add    r8, rcx
  00010   49 8b c0    mov    rax, r8
  00013   48 33 c1    xor    rax, rcx
  00016   8b ca       mov    ecx, edx
  00018   48 c1 e8 02    shr    rax, 2
  0001c   48 d3 e8    shr    rax, cl
  0001f   49 0b c0    or    rax, r8
  00022   c3       ret    0
?snoob@@YA_K_K@Z ENDP
_TEXT   ENDS


Code: Select all
// get next greater subset of set with same number of one bits
u64 snoob (u64 sub, u64 set) {
   u64 tmp=sub-1, rip=set&(tmp+(-(i64)sub&sub)-set);
   for(sub=(tmp&sub)^rip;sub&=sub-1;rip^=tmp,set^=tmp)
       tmp=set&-(i64)set;
   return rip;
}
Code: Select all
_TEXT   SEGMENT
sub$ = 8
set$ = 16
?snoob@@YA_K_K0@Z PROC
  00000   4c 8d 49 ff    lea    r9, QWORD PTR [rcx-1]
  00004   4c 8b c1    mov    r8, rcx
  00007   49 f7 d8    neg    r8
  0000a   4c 23 c1    and    r8, rcx
  0000d   4c 2b c2    sub    r8, rdx
  00010   4d 03 c1    add    r8, r9
  00013   4c 23 c9    and    r9, rcx
  00016   4c 23 c2    and    r8, rdx
  00019   4d 33 c8    xor    r9, r8
  0001c   49 8d 41 ff    lea    rax, QWORD PTR [r9-1]
  00020   4c 23 c8    and    r9, rax
  00023   74 18       je    SHORT $LN1@snoob
$LL3@snoob:
  00025   48 8b ca    mov    rcx, rdx
  00028   48 f7 d9    neg    rcx
  0002b   48 23 ca    and    rcx, rdx
  0002e   4c 33 c1    xor    r8, rcx
  00031   48 33 d1    xor    rdx, rcx
  00034   49 8d 49 ff    lea    rcx, QWORD PTR [r9-1]
  00038   4c 23 c9    and    r9, rcx
  0003b   75 e8       jne    SHORT $LL3@snoob
$LN1@snoob:
  0003d   49 8b c0    mov    rax, r8
  00040   c3       ret    0
?snoob@@YA_K_K0@Z ENDP
Gerd Isenberg
 
Posts: 285
Joined: 31 Jan 2005, 20:31
Location: Hattingen, Germany

Re: Snoobing a subset of a set

Postby Gerd Isenberg » 07 Feb 2007, 22:11

The next less value - or subset of a set - with same number of one bits would look like this:

Code: Select all
// get next less set of a subset with same number of one bits
u64 rSnoob (u64 sub, u64 set) {
   return ~snoob(~sub & set, set) & set;
}


Code: Select all
// get next less value with same number of one bits
u64 rSnoob (u64 sub) {
   return ~snoob(~sub);
}

or to save some bitscans with the cost of a conditional branch:
Code: Select all
// get next less value with same number of one bits
u64 rSnoob (u64 sub) {
   if ( sub & 1 )
      return ~snoob(~sub);
   u64 lsb = sub & -(i64)sub;
   return sub ^ lsb ^ (lsb>>1);
}
Gerd Isenberg
 
Posts: 285
Joined: 31 Jan 2005, 20:31
Location: Hattingen, Germany


Return to Programming and Technical Discussions

Who is online

Users browsing this forum: No registered users and 50 guests