Some experience on UA collection.
McGuire's method for short-sized UA collecting is very efficient for grid w/o forced givens but gives very limited results for subsets/supersets problem.
There are very efficient algorithms for UA4, for UA6, for 2-digits UA, but all of them are inapplicable too.
Some results gives collection of all 4-digits UA. It is based on exhaustive UA collection on all subgrids consisting of all combinations of all instances of 5 digits given, plus the forced givens. For more forced givens, 5-digits UA can be collected in similar way but for 0 forced givens this process is inefficient.
Exhaustive UA collection for forced givens + some random number of floating givens also works. For 81 floating givens the best result is for around 10 000 attempts with random 27-clue subgrid. It is difficult to predict the optimal number of attempts and number of random clues (only within the floating ones) for other situations.
I use 2 similar methods to collect all UA within a subgrid. It is essentially identification of unhit UA for a multiple-solution puzzle, within the context of one of its solutions.
The first method is closer to the topic. It assumes a fixed limit of minimal UA, currently 1 million in my code, but unlimited number of solutions to process.
The solver is called, and every solution is compared to the original grid so that a (potentially non-minimal) UA is identified. The obtained UA is checked against all already known UA and is a) discarded when non-minimal - other UA which is its subset is already known, or b) added and all their supersets are identified as non-minimal and marked for later deletion.
Periodically the list of the identified UA is purged from marked as invalid UA.
Below is part of the code.
- Code: Select all
class uaCollector {
public:
uaCollector(grid *theGrid, const unsigned char *unknowns, const int unknownsSize);
//uaCollector(grid *theGrid);
~uaCollector();
//void byMorphs();
bool addSolution(const char *sol);
//bool addMorph(const char *sol);
unsigned long long runSolver();
void addUaToGrid();
static const int maxSolutions = 1000000;
//static const int maxSolutions = 200000;
grid &g;
const unsigned char *unknowns;
const int unknownsSize;
int uniqueSize;
int uaMaxSize;
int firstInvalidUa;
int numInvalidUas;
bool uaLimitedBySize;
bm128 *uaArray;
};
uaCollector::uaCollector(grid *theGrid, const unsigned char *theUnknowns, const int theUnknownsSize)
: g(*theGrid), unknowns(theUnknowns), unknownsSize(theUnknownsSize), uniqueSize(0), firstInvalidUa(maxSolutions), numInvalidUas(0) {
uaArray = bm128::allocate(maxSolutions); //TODO: reduce the array size
uaMaxSize = opt.uaOpt->maxuasize;
uaLimitedBySize = (uaMaxSize < 81);
}
uaCollector::~uaCollector() {
bm128::deallocate(uaArray);
}
bool uaCollector::addSolution(const char *sol) {
//compose bitmask with differences between original grid & solution
bm128 us;
us.clear();
int uaSize = 0;
if(uaLimitedBySize) {
for(int i = 0; i < unknownsSize; i++) {
if(sol[unknowns[i]] != g.digits[unknowns[i]]) {
us.setBit(unknowns[i]);
if(++uaSize > uaMaxSize) {
return false; //skip larger UA
}
}
}
}
else {
for(int i = 0; i < unknownsSize; i++) {
if(sol[unknowns[i]] != g.digits[unknowns[i]]) {
us.setBit(unknowns[i]);
}
}
}
//do some tests
for(int y = 0; y < uniqueSize; y++) {
if(uaArray[y].isSubsetOf(us)) {
return false; //skip supersets & duplicates
}
if(us.isSubsetOf(uaArray[y])) {
if(!uaArray[y].isInvalid()) {
uaArray[y].invalidate(); //mark the superset (many can exist)
if(firstInvalidUa > y)
firstInvalidUa = y;
numInvalidUas++;
}
}
}
//passed the tests, add at the end of list
if(numInvalidUas > 500 || uniqueSize >= maxSolutions) {
//attempt compressing the uaArray by removing the invalid elements
int compressedSize = firstInvalidUa;
for(int i = firstInvalidUa + 1; i < uniqueSize; i++) {
if(!uaArray[i].isInvalid()) { //skip marked
uaArray[compressedSize++] = uaArray[i];
}
}
if(compressedSize >= maxSolutions) {
//nothing to do, return error
return true;
}
uniqueSize = compressedSize;
firstInvalidUa = maxSolutions;
numInvalidUas = 0;
//fprintf(stderr, "*");
}
uaArray[uniqueSize++] = us;
return false;
}
unsigned long long uaCollector::runSolver() {
unsigned long long solutions;
char gg[81];
//copy the grid
for(int i = 0; i < 81; i++)
gg[i] = g.digits[i];
//clear the cells of interest
for(int i = 0; i < unknownsSize; i++)
gg[unknowns[i]] = 0;
solutions = solve(g.gridBM, gg, this); //for each solution except original it calls addSolution
return solutions;
}
void uaCollector::addUaToGrid() {
uset ua;
for(int x = 0; x < uniqueSize; x++) {
if(!uaArray[x].isInvalid()) { //skip marked
ua = uaArray[x];
ua.positionsByBitmap();
g.usetsBySize.insert(ua);
}
}
}
The second method assumes all solutions are known (by solving the puzzle but storing the solutions in buffer), and there is no
preferable solution grid (which is not the case here, the code comes from large UA generation where we have no fixed solution grid but are interested in UA itselves). This method could easily be adopted to a fixed preferable grid. I am including it because a) there are additional SIMD optimizations included, and b) the collected UA are also periodically sorted so that the shorter UA, which have better chance to kill other UA as non-minimal, are positioned at the top.
The code.
- Code: Select all
struct bm96 {
bm128 bytes[6];
void load(const char *p) {
bytes[0].loadUnaligned(p + 0 * 16);
bytes[1].loadUnaligned(p + 1 * 16);
bytes[2].loadUnaligned(p + 2 * 16);
bytes[3].loadUnaligned(p + 3 * 16);
bytes[4].loadUnaligned(p + 4 * 16);
bytes[5].bitmap128.m128i_u8[0] = *(p + 5 * 16); //81th byte, don't read after the possible buffer end
}
void diff(const char *p, bm128& dif) const {
bm128 pp;
dif.clear();
pp.loadUnaligned(p + 0 * 16);
dif.bitmap128.m128i_u32[0] = bytes[0].diffOctets(pp);
pp.loadUnaligned(p + 1 * 16);
dif.bitmap128.m128i_u32[0] |= (bytes[1].diffOctets(pp) << 16);
pp.loadUnaligned(p + 2 * 16);
dif.bitmap128.m128i_u32[1] = bytes[2].diffOctets(pp);
pp.loadUnaligned(p + 3 * 16);
dif.bitmap128.m128i_u32[1] |= (bytes[3].diffOctets(pp) << 16);
pp.loadUnaligned(p + 4 * 16);
dif.bitmap128.m128i_u32[2] = bytes[4].diffOctets(pp);
//pp.loadUnaligned(p + 5 * 16);
//dif.bitmap128.m128i_u32[2] |= (bytes[5].diffOctets(pp) << 16);
if(*(p + 5 * 16) != bytes[5].bitmap128.m128i_u8[0])
dif.bitmap128.m128i_u32[2] |= (1 << 16);
}
};
void findUaBySolutions(const char *solList, const int nSol) {
int uaMaxSize = opt.uaOpt->maxuasize;
int uaMinSize = opt.uaOpt->minuasize;
bool uaLimitedBySize = (uaMaxSize < 81);
#ifdef _OPENMP
#pragma omp parallel for schedule(dynamic, 1)
#endif
for(int s = 0; s < nSol; s++) {
//bm128* uaArray = bm128::allocate(nSol);
sizedUset* uaArray = (sizedUset*)bm128::allocate(nSol);
const char *sp = &solList[s * 81];
bm96 sp96;
sp96.load(sp);
//for(sp = solList; sp < send; sp += 81) {
int nUa = 0;
int numInvalidUas = 0;
int firstInvalidUa = nSol;
const char *ssp, *send = solList + 81 * nSol;
for(ssp = solList; ssp < send; ssp += 81) {
if(ssp == sp)
continue; //don't compare to itself
sizedUset &us = uaArray[nUa];
us.clear();
if(uaLimitedBySize) {
int uaSize = 0;
for(int i = 0; i < 81; i++) {
if(ssp[i] != sp[i]) {
us.setBit(i);
if(++uaSize > uaMaxSize) {
goto nextSol; //skip large UA
}
}
}
}
else {
//compare in parallel;
sp96.diff(ssp, us);
//for(int i = 0; i < 81; i++) {
// if(ssp[i] != sp[i]) {
// us.setBit(i);
// }
//}
}
//do some tests
for(int y = 0; y < nUa; y++) {
if(uaArray[y].isSubsetOf(us)) {
goto nextSol; //skip supersets & duplicates
}
if(us.isSubsetOf(uaArray[y])) {
if(!uaArray[y].isInvalid()) {
uaArray[y].invalidate(); //mark the superset (many can exist)
if(firstInvalidUa > y)
firstInvalidUa = y;
numInvalidUas++;
}
}
}
//passed the tests
us.setSize();
nUa++;
// compress uaArray if necessary
//if(numInvalidUas > 120) { //37"
//if(numInvalidUas > 5) { //slow"
if(numInvalidUas > 15) { //23"
//if(numInvalidUas > 20) { //23"
//attempt compressing the uaArray by removing the invalid elements
int compressedSize = firstInvalidUa;
for(int i = firstInvalidUa + 1; i < nUa; i++) {
if(!uaArray[i].isInvalid()) { //skip marked
uaArray[compressedSize++] = uaArray[i];
}
}
nUa = compressedSize;
firstInvalidUa = nSol;
numInvalidUas = 0;
//std::sort(uaArray, uaArray + nUa, isBm128Smaller);
std::sort(uaArray, uaArray + nUa, sizedUset::isSmaller);
}
//if(0 == nUa % 60) { //163"(10000, 60/60), 140"(10000, 15)
// std::sort(uaArray, uaArray + nUa, isBm128Smaller);
//}
nextSol:
;
}
//at this point we have uaArray populated, with possible invalid UA
//#ifdef _OPENMP
//#pragma omp critical
//#endif
for(int y = 0; y < nUa; y++) {
sizedUset &u = uaArray[y];
if(!u.isInvalid()) {
//int uaSize = 0;
//int uaSize = u.popcount_128();
int uaSize = u.getSize();
#if 1
//hunting large UA sets at this point we have a stream of many many minimal UA
//let hunt for high-valency UA too
const int min_valency = 16;
const int min_valency_size = 20; //check large UA only
char ip[81]; //inverted multiple-solution puzzle
int valency = 0;
if(uaSize >= min_valency_size) {
for(int i = 0; i < 81; i++) {
if(u.isBitSet(i)) {
ip[i] = 0;
}
else {
ip[i] = sp[i];
}
}
valency = (int)solve(ip, INT_MAX);
}
if(uaSize >= uaMinSize || valency >= min_valency) {
#else
if(uaSize >= uaMinSize) {
#endif
//output the UA
char p[81];
for(int i = 0; i < 81; i++) {
if(u.isBitSet(i)) {
p[i] = sp[i] + '0';
}
else {
p[i] = '.';
}
}
#ifdef _OPENMP
#pragma omp critical
#endif
{
if(uaSize >= uaMinSize) {
printf("%81.81s\n", p);
}
#if 1
if(valency >= min_valency) {
printf("%81.81s@%d\n", p, valency);
}
#endif
fflush(NULL);
}
}
}
}
bm128::deallocate(uaArray);
}
}
The best exhaustive UA collection process based on the above methods should be a mixture of both - targeted on a preferred grid, with SIMD, with sorting by size, and w/o parallelization.
If hitting UA is done by bitmap indexes, the process of indexing is actually transposition of bit matrix and efficient SIMD algorithm is implemented in the example code for minimization, see the reference in my previous post.
[edit: few typos fixed]