Here's the bias-testing software ...
Hidden Text: Show
- Code: Select all
/* This program uses the patterns and counts in a user-supplied parameter file
* to score solution grids presented on stdin for bias
*/
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <mem.h>
#include <time.h>
#define NREP 36288
#define NREPX 22266
#define SWAP(A,B) { int t = A; A = B; B = t; }
#define NTOP 20
extern int isatty(int);
typedef unsigned int uint32;
typedef struct {
double score;
double bias;
int pat;
} patscore_t;
typedef int patct_t[416];
typedef char patstr_t[27];
static int b1rep[416][3][9] = {{{0}}}, b1rct[416] = {0};
static double b1p[416];
static int map416to44[416];
static int htable[6000];
static int rep[NREP][7];
static int colour[NREP];
static int nsoln[44] = { /* I happen to know that these are the solution counts */
/* for the gangsters generated by this particular code */
97961464, 97539392, 98369440, 97910032,
96482296, 97549160, 97287008, 97416016,
97477096, 96807424, 98119872, 98371664,
98128064, 98733568, 97455648, 97372400,
97116296, 95596592, 97346960, 97714592,
97992064, 98153104, 98733184, 98048704,
96702240, 98950072, 97685328, 98784768,
98493856, 100231616, 99525184, 96100688,
96631520, 97756224, 99083712, 98875264,
102047904, 101131392, 96380896, 102543168,
99258880, 94888576, 97282720, 108374976
};
/* ----------------------------------------------------------------------
* safe malloc() and realloc()
* ----------------------------------------------------------------------
*/
#define SAFE_MALLOC(M,S,T) { M = (T*)__safe_realloc(NULL,(S)*sizeof(T),__FILE__,__FUNCTION__,__LINE__); }
#define SAFE_REALLOC(M,S,T) { M = (T*)__safe_realloc((M),(S)*sizeof(T),__FILE__,__FUNCTION__,__LINE__); }
static void *__safe_realloc(void *mem, int sz, char *file, const char *function, int line) {
if (NULL == (mem=realloc(mem,sz))) {
fprintf(stderr,"%s:%s() line %d: failed to allocate %d ",file,function,line,sz);
perror("bytes");
exit(1);
}
return mem;
}
/* ----------------------------------------------------------------------
* miscellaneous utility functions
* ----------------------------------------------------------------------
*/
static int pack(int a, int b, int c) {
int packme[10]={0}, packed=0, i;
packme[a] = packme[b] = packme[c] = 1;
for (i=1; i<=9; i++) if (packme[i]) packed = (packed<<4) + i;
return packed;
}
static int qsrepcmp(const void *a, const void *b) {
return memcmp(a,b,6*sizeof(int));
}
static void reporder(int *rep) {
if (rep[0] > rep[1]) SWAP(rep[0],rep[1]);
if (rep[0] > rep[2]) SWAP(rep[0],rep[2]);
if (rep[1] > rep[2]) SWAP(rep[1],rep[2]);
if (rep[3] > rep[4]) SWAP(rep[3],rep[4]);
if (rep[3] > rep[5]) SWAP(rep[3],rep[5]);
if (rep[4] > rep[5]) SWAP(rep[4],rep[5]);
if (rep[0]*0x1000+rep[1] > rep[3]*0x1000+rep[4]) {
SWAP(rep[0],rep[3]);
SWAP(rep[1],rep[4]);
SWAP(rep[2],rep[5]);
}
return;
}
static void printrep(int *rep) {
printf("%3x %3x %3x %3x %3x %3x",rep[0],rep[1],rep[2],rep[3],rep[4],rep[5]);
return;
}
static void replistsort(int (*replist)[7], int nrep) {
int i, j;
qsort(replist,nrep,7*sizeof(int),qsrepcmp);
for (i=j=1; i<nrep; i++) {
if (qsrepcmp(replist+i-1,replist+i)) {
if (j < i) memcpy(replist+j,replist+i,7*sizeof(int));
j++;
} else {
replist[j-1][6] += replist[i][6];
}
}
return;
}
static int findrep(int *findme) {
int i0 = -1;
int i1 = NREPX;
while (i1 > i0+1) {
int imid = (i0+i1) / 2;
int res = memcmp(findme,rep+imid,6*sizeof(int));
if (res == 0) return imid;
else if (res < 0) i1 = imid;
else i0 = imid;
}
return -1;
}
/* ----------------------------------------------------------------------
* Get full representatives list of normalised boxes 1,2,3. Reps are
* stored as 6 ints s.t. rep[0]<rep[1]<rep[2], rep[3]<rep[4]<rep[5] and
* rep[0,1,2] < rep[3,4,5], plus one further int to count the number of
* normalised box 1,2,3s that map to this rep.
* ----------------------------------------------------------------------
*/
static void get_repx(int level) {
static int box[3][9];
static int ct;
int ii, jj, ok;
int i;
if (level == 0) for (i=ct=0; i<9; i++) box[i/3][i%3] = i + 1;
/* enforce row ordering
*/
if (level==2 && box[0][4]<box[0][3]) return;
if (level==3 && box[0][5]<box[0][4]) return;
if (level==4 && box[0][6]<box[0][3]) return;
if (level==5 && box[0][7]<box[0][6]) return;
if (level==6 && box[0][8]<box[0][7]) return;
/* store the representative when we've got normalised boxes 1,2,3
*/
if (level == 18) {
int tmp[6];
for (i=3; i<9; i++) tmp[i-3] = pack(box[0][i],box[1][i],box[2][i]);
reporder(tmp);
memcpy(rep+ct,tmp,6*sizeof(int));
rep[ct][6] = 1;
ct++;
return;
}
/* fall-through to recursive box-filling code */
ii = level / 6;
jj = 3 + level%6;
ok = 0x3fe;
for (i=0; i<jj; i++) ok &= ~(1<<box[ii][i]);
for (i=0; i<3*ii+jj%3; i++) ok &= ~(1<<box[i/3][i%3+3*(jj/3)]);
for (i=1; i<=9; i++) {
if (ok & (1<<i)) {
box[ii][jj] = i;
get_repx(level+1);
}
}
/* finish up by sorting & deduping the replist
*/
if (level == 0) replistsort(rep,NREP);
return;
}
/* ----------------------------------------------------------------------
* Find representatives equivalent to a given one and relabel all
* of these with the given "colour"
* ----------------------------------------------------------------------
*/
static void permute_gangster(int *rep, int true_colour) {
int box[3][9];
int perm[18] = { 0,1,2, 0,2,1, 1,0,2, 1,2,0, 2,0,1, 2,1,0 };
int ordbox, *colperm, *cellperm[3];
int map[10];
int i;
/* build basic box from representatives (but don't worry about
* digit order within columns)
*/
for (i=0; i<9; i++) box[i/3][i%3] = i + 1;
for (i=0; i<6; i++) {
box[0][i+3] = rep[i]>>8;
box[1][i+3] = 15 & (rep[i]>>4);
box[2][i+3] = 15 & rep[i];
}
/* find list of equivalent representatives
*/
for (ordbox=0; ordbox<3; ordbox++) {
for (colperm=perm; colperm<perm+18; colperm+=3) {
for (cellperm[0]=perm; cellperm[0]<perm+18; cellperm[0]+=3) {
for (cellperm[1]=perm; cellperm[1]<perm+18; cellperm[1]+=3) {
for (cellperm[2]=perm; cellperm[2]<perm+18; cellperm[2]+=3) {
int newbox[3][9];
int tmp[6];
/* relabel digits so that box {ordbox} has value 1+x+3*y
* in row {cellperm[x][y]} column {colperm[x]}
*/
for (i=0; i<9; i++) {
int x = i % 3;
int y = i / 3;
map[ box[ cellperm[x][y] ][ 3*ordbox+colperm[x] ] ] = i + 1;
}
for (i=0; i<27; i++)
newbox[i/9][i%9] = map[box[i/9][i%9]];
/* construct the representative for this relabelled arrangement
*/
for (i=0; i<6; i++) {
int col = 3*((ordbox+1+(i/3))%3) + i%3;
tmp[i] = pack(newbox[0][col],newbox[1][col],newbox[2][col]);
}
reporder(tmp);
/* this new rep may not correspond to any normalised box123;
* but if it does then we should relabel that matching rep
*/
if ( (i=findrep(tmp)) >= 0) colour[i] = true_colour;
}
}
}
}
}
return;
}
/* ----------------------------------------------------------------------
* get44() calculates the "gang of 44" permutation-equivalent
* band 1 representatives
* ----------------------------------------------------------------------
*/
static int get44(void) {
int xrep[44][7];
double solsum;
int bandtot, unavtot;
int classnum;
int i, j;
get_repx(0);
bandtot = unavtot = 0;
for (i=0; i<NREPX; i++) colour[i] = i;
solsum = 0.0;
for (i=classnum=0; i<NREPX; i++) {
int s6;
if (colour[i] < i) continue;
printf("%3d : ",++classnum);
printrep(rep[i]);
permute_gangster(rep[i],i);
memcpy(xrep[classnum-1],rep[i],7*sizeof(int));
for (j=i,s6=0; j<NREPX; j++) if (colour[j] == i) s6 += rep[j][6];
printf(" : %4d : ",s6);
printf("%9d\n",nsoln[classnum-1]);
solsum += s6 * 1.0 * nsoln[classnum-1];
}
printf("CHECK: solsum*72^2*9! = %f (should be 6.67e21)\n\n",solsum*72*72*362880);
for (i=0; i<classnum; i++) memcpy(rep[i],xrep[i],7*sizeof(int));
return 0;
}
/* --------------------------------------------------------------
* canonicalise a band
* --------------------------------------------------------------
*/
static void can_band(int (*band)[9], int (*savecg)[9]) {
int perm[18] = { 0,1,2, 0,2,1, 1,0,2, 1,2,0, 2,0,1, 2,1,0 };
int firstbox, *rp, *cp;
int i, j;
savecg[0][0] = 999;
for (firstbox=0; firstbox<3; firstbox++) {
for (rp=&perm[0]; rp<&perm[18]; rp+=3) {
for (cp=&perm[0]; cp<&perm[18]; cp+=3) {
int cg[3][9], strip[9], dmap[10];
int a, b, c;
for (i=0; i<3; i++) for (j=0; j<3; j++) cg[i][j] = dmap[band[rp[i]][3*firstbox+cp[j]]] = 3*i+j+1;
for (i=a=0; i<9; i++) if ( (strip[i]=dmap[band[rp[0]][i]]) == 4 ) a = i;
b = 3*(a/3) + (a+1)%3; c = 3*(a/3) + (a+2)%3; if (strip[b] > strip[c]) { i=b; b=c; c=i; }
for (i=0; i<3; i++) {
cg[i][3] = dmap[band[rp[i]][a]];
cg[i][4] = dmap[band[rp[i]][b]];
cg[i][5] = dmap[band[rp[i]][c]];
}
a = 3*(3-(a/3)-firstbox); b = a+1; c = a+2;
if (strip[a]<strip[b] && strip[b]<strip[c]) {
/* OK */
} else if (strip[a]<strip[c] && strip[c]<strip[b]) {
i = c; c = b; b = i;
} else if (strip[b]<strip[a] && strip[a]<strip[c]) {
i = a; a = b; b = i;
} else if (strip[b]<strip[c] && strip[c]<strip[a]) {
i = a; a = b; b = c; c = i;
} else if (strip[c]<strip[a] && strip[a]<strip[b]) {
i = a; a = c; c = b; b = i;
} else if (strip[c]<strip[b] && strip[b]<strip[a]) {
i = a; a = c; c = i;
}
for (i=0; i<3; i++) {
cg[i][6] = dmap[band[rp[i]][a]];
cg[i][7] = dmap[band[rp[i]][b]];
cg[i][8] = dmap[band[rp[i]][c]];
}
if (memcmp(cg,savecg,27*sizeof(int)) < 0) memcpy(savecg,cg,27*sizeof(int));
}
}
}
return;
}
/* --------------------------------------------------------------
* cband hash : calculate the hash of a canonicalised band
* (the caller must canonicalise it first)
* --------------------------------------------------------------
*/
static int cband_hash(int (*band)[9]) {
int h, j;
for (j=h=1; j<18; j++) h = (10*(h+j*band[j/6][3+j%6])) % 5669;
return h;
}
/* --------------------------------------------------------------
* band_index : calculate the index (0 to 415) of a band
* --------------------------------------------------------------
*/
static int band_index(int (*grid)[9], int bandnr) {
int band[3][9], cg[3][9];
int h, i, j;
for (i=0; i<3; i++) for (j=0; j<9; j++) band[i][j] = grid[3*(bandnr-1)+i][j];
can_band(band,cg);
for (j=h=1; j<18; j++) h = (10*(h+j*cg[j/6][3+j%6])) % 5669;
return htable[h];
}
/* --------------------------------------------------------------
* stack_index : calculate the index (0 to 415) of a stack
* --------------------------------------------------------------
*/
static int stack_index(int (*grid)[9], int stacknr) {
int band[3][9], cg[3][9];
int h, i, j;
for (i=0; i<3; i++) for (j=0; j<9; j++) band[i][j] = grid[j][3*(stacknr-1)+i];
can_band(band,cg);
for (j=h=1; j<18; j++) h = (10*(h+j*cg[j/6][3+j%6])) % 5669;
return htable[h];
}
/* --------------------------------------------------------------
* get416() generates the list of 416 canonical band 1
* representatives
* --------------------------------------------------------------
*/
static void get416(void) {
int band[3][9] = {
{ 1,2,3, 0,0,0, 0,0,0 },
{ 4,5,6, 0,0,0, 0,0,0 },
{ 7,8,9, 0,0,0, 0,0,0 }
};
int rok[3] = { 0x3f0, 0x38e, 0x07e };
int bok[3] = { 0, 0x3fe, 0x3fe };
int level;
int ct;
for (level=ct=0; level>=0; ) {
int r = level / 6;
int c = 3 + level%6;
int b = c / 3;
int d;
if (level == 18) {
int cg[3][9];
int i;
can_band(band,cg);
for (i=0; i<416 && b1rep[i][0][0] && memcmp(b1rep[i],cg,27*sizeof(int)); i++) ;
if (i < 416) {
if (!b1rep[i][0][0]) {
memcpy(b1rep[i],cg,27*sizeof(int));
htable[cband_hash(cg)] = i;
}
b1rct[i]++;
} else {
fprintf(stderr,"DOOM: b1rep overflow at band nr %d\n",ct+1);
exit(1);
}
ct++;
level--;
continue;
}
if (level==2 && band[0][4]<band[0][3]) { level--; continue; }
if (level==3 && band[0][5]<band[0][4]) { level--; continue; }
if (level==4 && band[0][6]<band[0][3]) { level--; continue; }
if (level==5 && band[0][7]<band[0][6]) { level--; continue; }
if (level==6 && band[0][8]<band[0][7]) { level--; continue; }
d = band[r][c];
if (d) {
band[r][c] = 0;
rok[r] ^= 1<<d;
bok[b] ^= 1<<d;
}
for (d++; d<=9; d++) if ((1<<d)&rok[r]&bok[b]) break;
if (d <= 9) {
band[r][c] = d;
rok[r] ^= 1<<d;
bok[b] ^= 1<<d;
level++;
} else {
level--;
}
}
{
int i, j;
for (i=0; i<416; i++) {
printf("%3d: %3d * ",i+1,b1rct[i]);
for (j=0; j<27; j++) printf("%d",b1rep[i][j/9][j%9]);
printf("\n");
}
}
return;
}
/* --------------------------------------------------------------
* expand44to416(rep,c44) takes a representative from the c44'th
* member of the "gang of 44" and permutes minicols to get all
* bands (with box B1 fixed) that map to this. For each band, we
* look up its index, bi in 0 to 415, then set map416to44[bi] = c44
*
* Called for each "gangster" in turn, this generates the entire
* map from band indices (0 to 415) to gangsters (0 to 43)
* --------------------------------------------------------------
*/
static void expand44to416(int *rep, int c44) {
int perm[18] = { 0,1,2, 0,2,1, 1,0,2, 1,2,0, 2,0,1, 2,1,0 };
int rok[3] = { 0x3fe, 0x3fe, 0x3fe };
int band[3][9], good[3][9];
int *a, *b, *c, *d, *e, *f, *g, *h;
int i, j;
for (i=0; i<3; i++) {
for (j=0; j<1; j++) { good[i][j] = 3*i + j + 1; rok[i] ^= 1<<good[i][j]; }
band[0][1] = 2; band[0][2] = 3;
band[1][1] = 5; band[1][2] = 6;
band[2][1] = 8; band[2][2] = 9;
for (j=3; j<9; j++) band[i][j] = 15 & (rep[j-3]>>(4*i));
}
#define PLOOP(P,C) \
for (P=&perm[0]; P<&perm[18]; P+=3) { \
int m0 = 1 << (good[0][C]=band[P[0]][C]); \
int m1 = 1 << (good[1][C]=band[P[1]][C]); \
int m2 = 1 << (good[2][C]=band[P[2]][C]); \
if ( (rok[0]&m0) && (rok[1]&m1) && (rok[2]&m2) ) { \
rok[0] ^= m0; rok[1] ^= m1; rok[2] ^= m2;
#define END_PLOOP rok[0] ^= m0; rok[1] ^= m1; rok[2] ^= m2; } }
PLOOP(a,1) PLOOP(b,2) PLOOP(c,3) PLOOP(d,4) PLOOP(e,5) PLOOP(f,6) PLOOP(g,7) PLOOP(h,8)
{
int bi = band_index(good,1);
if (map416to44[bi]==-1 || map416to44[bi]==c44) map416to44[bi] = c44;
else { fprintf(stderr,"Mapping conflict!\n"); exit(1); }
}
END_PLOOP
END_PLOOP
END_PLOOP
END_PLOOP
END_PLOOP
END_PLOOP
END_PLOOP
END_PLOOP
#undef END_PLOOP
#undef PLOOP
return;
}
/* --------------------------------------------------------------
* PROGRAM START
* --------------------------------------------------------------
*/
int main(int argc, char **argv) {
patct_t *patcts;
patstr_t *patstrs;
double *mean, *sd;
int npats;
int i, j;
/* check args
*/
if (argc!=2 || isatty(0)) {
fprintf(stderr,"usage: %s pats_file <grids_file\n",argv[0]);
exit(1);
}
/* read patterns file
*/
{
FILE *fp = fopen(argv[1],"r");
char line[5000];
if (!fp) {
perror(argv[1]);
exit(1);
}
patcts = NULL;
patstrs = NULL;
mean = NULL;
sd = NULL;
npats = 0;
while (fgets(line,5000,fp)) {
char *s = line;
for (i=j=0; line[i]>=0x20; i++) if (line[i]==0x20) {
line[i] = 0;
if (++j == 2) {
if (strlen(s) != 27) {
fprintf(stderr,"bad pattern string on line %d: %s\n",npats+1,s);
exit(1);
}
if (!(npats&(npats+1))) {
SAFE_REALLOC(patcts,2*npats+1,patct_t);
SAFE_REALLOC(patstrs,2*npats+1,patstr_t);
SAFE_REALLOC(mean,2*npats+1,double);
SAFE_REALLOC(sd,2*npats+1,double);
}
{ int k; for (k=0; k<27; k++) patstrs[npats][k] = ( (s[k]=='0') ? '.' : s[k] ); }
} else if (3<=j && j<=417) {
patcts[npats][j-3] = atoi(s);
} else if (j == 418) {
fprintf(stderr,"too many pattern counts on line %d\n",npats+1);
exit(1);
}
s = &line[i+1];
}
line[i] = 0;
if (++j != 418) {
fprintf(stderr,"too few pattern counts on line %d\n",npats+1);
exit(1);
}
patcts[npats++][j-3] = atoi(s);
}
fclose(fp);
printf("got %d patterns\n",npats);
}
/* experimental: add single-band patterns
*/
{
int bct[416] = {0};
char bstr[50];
memset(bstr,0x20,50);
for (i=0; i<416; i++) {
bct[i] = 1;
sprintf(bstr," (band nr %d) ",i+1);
if (!(npats&(npats+1))) {
SAFE_REALLOC(patcts,2*npats+1,patct_t);
SAFE_REALLOC(patstrs,2*npats+1,patstr_t);
SAFE_REALLOC(mean,2*npats+1,double);
SAFE_REALLOC(sd,2*npats+1,double);
}
memcpy(patcts[npats],bct,416*sizeof(int));
memcpy(patstrs[npats],bstr,27);
npats++;
bct[i] = 0;
}
printf("added single-band patterns: now npats=%d\n",npats);
}
/* get "gangsters" and isomorphism class representatives, and work out
* the map that takes isomorphism class indices 0..415 to gangster
* class indices 0..43; also deduce the probability that a random grid
* will have band 1 in each isomorphism class 0..415; finally, work
* out the mean and (upper bound on) sd of each pattern
*/
get44();
get416();
for (i=0; i<416; i++) { map416to44[i] = -1; b1p[i] = 0.0; }
for (i=0; i<44; i++) expand44to416(rep[i],i);
{
double psum = 0.0;
for (i=0; i<416; i++) psum += b1p[i] += b1rct[i]*1.0*nsoln[map416to44[i]];
for (i=0; i<416; i++) b1p[i] /= psum;
for (i=0; i<416; i++) printf("iso class %3d : pr = 1 in %f\n",i,1/b1p[i]);
}
for (i=0; i<npats; i++) {
mean[i] = sd[i] = 0;
for (j=0; j<416; j++) mean[i] += b1p[j] * patcts[i][j];
for (j=0; j<416; j++) { double t = mean[i] - patcts[i][j]; sd[i] += b1p[j] * t * t; }
mean[i] *= 6;
sd[i] = 6 * sqrt(sd[i]); /* upper bound on the standard deviation */
}
/* read grids from stdin and test them
*/
{
int bandcount[416] = {0};
char line[5000];
int ng = 0;
while (fgets(line,5000,stdin)) {
int grid[9][9];
for (i=0; 0x31<=line[i] && line[i]<=0x39; i++) ((int*)grid)[i] = line[i] - 0x30;
if (i != 81) {
fprintf(stderr,"bad grid: %s\n",line);
exit(1);
}
ng++;
bandcount[ band_index(grid,1) ]++;
bandcount[ band_index(grid,2) ]++;
bandcount[ band_index(grid,3) ]++;
bandcount[ stack_index(grid,1) ]++;
bandcount[ stack_index(grid,2) ]++;
bandcount[ stack_index(grid,3) ]++;
if (ng%10000 == 0) {
patscore_t top[NTOP];
int patnr;
printf("progress: %d grids\n",ng);
for (i=0; i<NTOP; i++) top[i].score = top[i].bias = 0;
for (patnr=0; patnr<npats; patnr++) {
double score=0, bias;
for (i=0; i<416; i++) score += bandcount[i] * 1.0 * patcts[patnr][i];
bias = (score/ng)/mean[patnr] - 1;
score = (score-mean[patnr]*ng) / (sd[patnr]*sqrt(ng));
#define SORTBY score
if (fabs(SORTBY) > fabs(top[NTOP-1].SORTBY)) {
for (i=0; i<NTOP; i++) if (fabs(SORTBY)>fabs(top[i].SORTBY)) break;
for (j=NTOP-2; j>=i; j--) memcpy(&top[j+1],&top[j],sizeof(patscore_t));
top[i].score = score;
top[i].bias = bias;
top[i].pat = patnr;
}
}
printf(" +-----------------------------+----------+-----------+----------+\n");
printf(" | Pattern | Bias | Z-score | Z_1M |\n");
printf(" +-----------------------------+----------+-----------+----------+\n");
for (i=0; i<NTOP; i++) if (top[i].SORTBY) {
printf(" | ");
fwrite(patstrs[top[i].pat],1,27,stdout);
printf(" | %7.2f%% | %9.2f | %8.2f |\n",top[i].bias*100,top[i].score,top[i].score*sqrt(1000000.0/ng));
}
printf(" +-----------------------------+----------+-----------+----------+\n");
}
}
printf("processed %d grids\n",ng);
}
return 0;
}
... and the standard patterns file I use for the tests
(Or maybe not: "Your message contains 2989725 characters. The maximum number of allowed characters is 100000.". Can we host this on the forum another way?)
As with the unbiased grid generator, I've not compiled this in ages so would welcome bug reports. Ta.