问题
I am trying to build a 6-tuple store on top of wiredtiger. The tuples can be described as follow:
(graph, subject, predicate, object, alive, transaction)
Every tuple stored in the database is unique.
Queries are like regular SPARQL queries except that the database store 6 tuples.
Zero of more elements of a tuple can be variable. Here is an example query that allows to retrieve all changes introduces by a particular transaction P4X432
:
SELECT ?graph ?subject ?predicate ?object ?alive
WHERE
{
?graph ?subject ?predicate ?object ?alive "P4X432"
}
Considering all possible patterns ends up with considering all combinations of:
(graph, subject, predicate, object, alive, transaction)
That is given by the following function:
def combinations(tab):
out = []
for i in range(1, len(tab) + 1):
out.extend(x for x in itertools.combinations(tab, i))
assert len(out) == 2**len(tab) - 1
return out
Where:
print(len(combinations(('graph', 'subject', 'predicate', 'object', 'alive', 'transaction'))))
Display:
63
That is there 63 combinations of the 6-tuples. I can complete each indices with the missing tuple item, e.g. the following combination:
('graph', 'predicate', 'transaction')
Will be associated with the following index:
('graph', 'predicate', 'transaction', 'subject', 'alive', 'object')
But I know there is a smaller subset of all permutations of the 6-tuple that has the following property:
A set of n-permutations of {1, 2, ..., n} where all combinations of {1, 2, ..., n} are prefix-permutation of at least one element of the set.
Otherwise said, all combinations have a permutation that is prefix of one element of the set.
I found using a brute force algorithm a set of size 25 (inferior to 63) that has that property:
((5 0 1 2 3 4) (4 5 0 1 2 3) (3 4 5 0 1 2) (2 3 4 5 0 1) (1 2 3 4 5 0) (0 1 2 3 4 5) (0 2 1 3 4 5) (0 3 2 1 5 4) (0 4 3 1 5 2) (0 4 2 3 1 5) (2 1 5 3 0 4) (3 2 1 5 0 4) (3 1 4 5 0 2) (3 1 5 4 2 0) (3 0 1 4 2 5) (3 5 2 0 1 4) (4 3 1 0 2 5) (4 2 1 5 3 0) (4 1 0 2 5 3) (4 5 2 1 0 3) (5 4 1 2 3 0) (5 3 0 1 4 2) (5 2 1 3 4 0) (5 1 2 4 0 3) (5 0 2 4 3 1))
Here is the r7rs scheme program I use to compute that solution:
(define-library (indices)
(export indices)
(export permutations)
(export combination)
(export combinations)
(export run)
(import (only (chezscheme) trace-define trace-lambda random trace-let))
(import (scheme base))
(import (scheme list))
(import (scheme comparator))
(import (scheme hash-table))
(import (scheme process-context))
(import (scheme write))
(begin
(define (combination k lst)
(cond
((= k 0) '(()))
((null? lst) '())
(else
(let ((head (car lst))
(tail (cdr lst)))
(append (map (lambda (y) (cons head y)) (combination (- k 1) tail))
(combination k tail))))))
(define (factorial n)
(let loop ((n n)
(out 1))
(if (= n 0)
out
(loop (- n 1) (* n out)))))
(define (%binomial-coefficient n k)
;; https://en.wikipedia.org/wiki/Binomial_coefficient#Multiplicative_formula
(let loop ((i 1)
(out 1))
(if (= i (+ k 1))
out
(loop (+ i 1) (* out (/ (- (+ n 1) i) i))))))
(define (memo proc)
(let ((m (make-hash-table (make-equal-comparator))))
(lambda args
(if (hash-table-contains? m args)
(hash-table-ref m args)
(let ((v (apply proc args)))
(hash-table-set! m args v)
v)))))
(define binomial-coefficient
(memo
(lambda (n k)
(cond
((= n k) 1)
((= k 0) 1)
(else (%binomial-coefficient n k))))))
;; k-combination ranking and unranking procedures according to
;; https://en.wikipedia.org/wiki/Combinatorial_number_system
(define (ranking lst)
(let loop ((lst (sort < lst)) ;; increasing sequence
(k 1)
(out 0))
(if (null? lst)
out
(loop (cdr lst) (+ k 1) (+ out (binomial-coefficient (car lst) k))))))
(define (%unranking k N)
(let loop ((n (- k 1)))
(if (< N (binomial-coefficient (+ n 1) k))
n
(loop (+ n 1)))))
(define (unranking k N)
(let loop ((k k)
(N N)
(out '()))
(if (= k 0)
out
(let ((m (%unranking k N)))
(loop (- k 1) (- N (binomial-coefficient m k)) (cons m out))))))
(define fresh-random
(let ((memo (make-hash-table (make-eqv-comparator))))
(lambda (n)
(when (= (hash-table-size memo) n)
(error 'oops "no more fresh number" n
))
(let loop ()
(let ((r (random n)))
(if (hash-table-contains? memo r)
(loop)
(begin (hash-table-set! memo r #t) r)))))))
(define (random-k-combination k n)
(unranking k (fresh-random (binomial-coefficient n k))))
(define (combinations lst)
(if (null? lst) '(())
(let* ((head (car lst))
(tail (cdr lst))
(s (combinations tail))
(v (map (lambda (x) (cons head x)) s)))
(append s v))))
;; (define (combinations lst)
;; (append-map (lambda (k) (combination k lst)) (iota (length lst))))
(define (permutations s)
;; http://rosettacode.org/wiki/Permutations#Scheme
(cond
((null? s) '(()))
((null? (cdr s)) (list s))
(else ;; extract each item in list in turn and permutations the rest
(let splice ((l '()) (m (car s)) (r (cdr s)))
(append
(map (lambda (x) (cons m x)) (permutations (append l r)))
(if (null? r) '()
(splice (cons m l) (car r) (cdr r))))))))
(define (shift lst index)
(append (drop lst index) (take lst index)))
(define (rotations lst)
(reverse! (map (lambda (index) (shift lst index)) (iota (length lst)))))
(define (prefix? lst other)
"Return #t if LST is prefix of OTHER"
(let prefix ((lst lst)
(other other))
(if (null? lst)
#t
(if (= (car lst) (car other))
(prefix (cdr lst) (cdr other))
#f))))
(define (indices lst)
(let ((candidates (permutations lst)))
(let loop ((out (rotations lst)) ;; all rotations are solutions
(combinations (combinations lst)))
(if (null? combinations)
(reverse! out)
(let ((permutations (permutations (car combinations))))
(if (any (lambda (s) (any (lambda (p) (prefix? p s)) permutations)) out)
;; there is an existing "solution" for the
;; permutations of COMBINATION move to the next
;; combination
(loop out (cdr combinations))
(loop (cons (find (lambda (c) (if (member c out)
#f
(any (lambda (p) (prefix? p c)) permutations)))
candidates)
out)
(cdr combinations))))))))
(define (permutation-prefix? c o)
(any (lambda (p) (prefix? p o)) (permutations c)))
(define (ok? combinations candidate)
(every (lambda (c) (any (lambda (p) (permutation-prefix? c p)) candidate)) combinations))
(define (run)
(let* ((n (string->number (cadr (command-line))))
(N (iota n))
(solution (indices N))
(min (length solution))
(rotations (rotations N))
(R (length rotations))
;; other stuff
(cx (combinations N))
(px (filter (lambda (x) (not (member x rotations))) (permutations N)))
;; other other stuff
(pn (length px))
(PN (iota pn)))
(display "(length solution) => ") (display (length solution))
(display "\n")
(display "(length rotations) => ") (display R)
(display "\n")
(let try ((x (- (length solution) 1)))
(let ((count (binomial-coefficient pn (- x R))))
(let loop ((index 0)
(cxx (map (lambda (x) (list-ref px x)) (random-k-combination (- x R) pn))))
(when (= (modulo index (expt 10 5)) 0)
(display "n=") (display n) (display " x=") (display x)
(display " ")
(display index) (display "/") (display count) (display "\n"))
(let ((candidate (append rotations cxx)))
(let ((continue? (not (ok? cx candidate))))
(if continue?
(loop (+ index 1)
(map (lambda (x) (list-ref px x)) (random-k-combination (- x R) pn)))
(begin (display "new solution n=") (display n)
(display " length=") (display x)
(display " ") (display candidate)
(display "\n")
(try (- x 1)))))))))))
))
With that list of permutations I can query any pattern.
I am wondering if there is a smaller set and whether there is definitive algorithm to compute that kind of set.
回答1:
Based on this answer https://math.stackexchange.com/a/3146793/23663
The following program yields a solution that is a minimal solution according to math ™:
import itertools
import math
f = math.factorial
bc = lambda n, k: f(n) // f(k) // f(n-k) if k<n else 0
def pk(*args):
print(*args)
return args[-1]
def stringify(iterable):
return ''.join(str(x) for x in iterable)
def combinations(tab):
out = []
for i in range(1, len(tab) + 1):
out.extend(stringify(x) for x in itertools.combinations(tab, i))
assert len(out) == 2**len(tab) - 1
return out
def ok(solutions, tab):
cx = combinations(tab)
px = [stringify(x) for x in itertools.permutations(tab)]
for combination in cx:
pcx = [''.join(x) for x in itertools.permutations(combination)]
# check for existing solution
for solution in solutions:
if any(solution.startswith(p) for p in pcx):
# yeah, there is an existing solution
break
else:
print('failed with combination={}'.format(combination))
break
else:
return True
return False
def run(n):
tab = list(range(n))
cx = list(itertools.combinations(tab, n//2))
for c in cx:
L = [(i, i in c) for i in tab]
A = []
B = []
while True:
for i in range(len(L) - 1):
if (not L[i][1]) and L[i + 1][1]:
A.append(L[i + 1][0])
B.append(L[i][0])
L.remove((L[i + 1][0], True))
L.remove((L[i][0], False))
break
else:
break
l = [i for (i, _) in L]
yield A + l + B
for i in range(7):
tab = stringify(range(i))
solutions = [stringify(x) for x in run(i)]
assert ok(solutions, tab)
print("n={}, size={}, solutions={}".format(i, len(solutions), solutions))
The above program output is:
n=0, size=1, solutions=['']
n=1, size=1, solutions=['0']
n=2, size=2, solutions=['01', '10']
n=3, size=3, solutions=['012', '120', '201']
n=4, size=6, solutions=['0123', '2031', '3012', '1230', '1302', '2310']
n=5, size=10, solutions=['01234', '20341', '30142', '40123', '12340', '13402', '14203', '23410', '24013', '34021']
n=6, size=20, solutions=['012345', '301452', '401253', '501234', '203451', '240513', '250314', '340521', '350124', '450132', '123450', '142503', '152304', '134502', '135024', '145032', '234510', '235104', '245130', '345210']
来源:https://stackoverflow.com/questions/55143485/what-is-a-smallest-set-of-indices-that-allows-to-fully-bind-any-pattern-of-6-tup