datalog: first-arg index per relation (Phase 5e perf, 169/169)
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 33s
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 33s
db gains :facts-index {<rel>: {<first-arg-key>: tuples}} mirroring
the membership :facts-keys index. dl-add-fact! populates the index;
dl-match-positive walks the body literal's first arg under the
current subst — when it's bound to a non-var, look up by (str arg)
instead of scanning the full relation.
For chain-style recursive rules (parent X Y), (ancestor Y Z) the
inner Y has at most one parent, so the inner lookup returns 0–1
tuples instead of N. chain-25 saturation drops from ~33s to ~18s
real (~2x). chain-50 still long but tractable; next bottleneck is
subst dict copies during unification.
dl-retract! refreshed to keep the new index consistent: kept-index
rebuilt during EDB filter, IDB wipes clear all three slots.
Differential semi-naive test bumped to chain-12, semi-only count
test to chain-25.
This commit is contained in:
@@ -112,7 +112,8 @@
|
||||
(let
|
||||
((existing (get (get db :facts) rel-key))
|
||||
(kept (list))
|
||||
(kept-keys {}))
|
||||
(kept-keys {})
|
||||
(kept-index {}))
|
||||
(do
|
||||
(for-each
|
||||
(fn
|
||||
@@ -121,10 +122,19 @@
|
||||
(not (dl-tuple-equal? t lit))
|
||||
(do
|
||||
(append! kept t)
|
||||
(dict-set! kept-keys (dl-tuple-key t) true))))
|
||||
(dict-set! kept-keys (dl-tuple-key t) true)
|
||||
(when
|
||||
(>= (len t) 2)
|
||||
(let ((k (dl-arg-key (nth t 1))))
|
||||
(do
|
||||
(when
|
||||
(not (has-key? kept-index k))
|
||||
(dict-set! kept-index k (list)))
|
||||
(append! (get kept-index k) t)))))))
|
||||
existing)
|
||||
(dict-set! (get db :facts) rel-key kept)
|
||||
(dict-set! (get db :facts-keys) rel-key kept-keys))))
|
||||
(dict-set! (get db :facts-keys) rel-key kept-keys)
|
||||
(dict-set! (get db :facts-index) rel-key kept-index))))
|
||||
;; Wipe all relations that have a rule (these are IDB) so the
|
||||
;; saturator regenerates them from the surviving EDB.
|
||||
(let ((rule-heads (dl-rule-head-rels db)))
|
||||
@@ -133,7 +143,8 @@
|
||||
(k)
|
||||
(do
|
||||
(dict-set! (get db :facts) k (list))
|
||||
(dict-set! (get db :facts-keys) k {})))
|
||||
(dict-set! (get db :facts-keys) k {})
|
||||
(dict-set! (get db :facts-index) k {})))
|
||||
rule-heads))
|
||||
(dl-saturate! db)
|
||||
db))))
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
;; lib/datalog/builtins.sx) swaps in the real `dl-rule-check-safety`,
|
||||
;; which is order-aware and understands built-in predicates.
|
||||
|
||||
(define dl-make-db (fn () {:facts {} :facts-keys {} :rules (list)}))
|
||||
(define dl-make-db (fn () {:facts {} :facts-keys {} :facts-index {} :rules (list)}))
|
||||
|
||||
(define
|
||||
dl-rel-name
|
||||
@@ -98,7 +98,8 @@
|
||||
(db rel-key)
|
||||
(let
|
||||
((facts (get db :facts))
|
||||
(fk (get db :facts-keys)))
|
||||
(fk (get db :facts-keys))
|
||||
(fi (get db :facts-index)))
|
||||
(do
|
||||
(when
|
||||
(not (has-key? facts rel-key))
|
||||
@@ -106,8 +107,51 @@
|
||||
(when
|
||||
(not (has-key? fk rel-key))
|
||||
(dict-set! fk rel-key {}))
|
||||
(when
|
||||
(not (has-key? fi rel-key))
|
||||
(dict-set! fi rel-key {}))
|
||||
(get facts rel-key)))))
|
||||
|
||||
;; First-arg index helpers. Tuples are keyed by their first-after-rel
|
||||
;; arg's `(str ...)`; when that arg is a constant, dl-match-positive
|
||||
;; uses the index instead of scanning the full relation.
|
||||
(define
|
||||
dl-arg-key
|
||||
(fn
|
||||
(v)
|
||||
(str v)))
|
||||
|
||||
(define
|
||||
dl-index-add!
|
||||
(fn
|
||||
(db rel-key lit)
|
||||
(let
|
||||
((idx (get db :facts-index))
|
||||
(n (len lit)))
|
||||
(when
|
||||
(and (>= n 2) (has-key? idx rel-key))
|
||||
(let
|
||||
((rel-idx (get idx rel-key))
|
||||
(k (dl-arg-key (nth lit 1))))
|
||||
(do
|
||||
(when
|
||||
(not (has-key? rel-idx k))
|
||||
(dict-set! rel-idx k (list)))
|
||||
(append! (get rel-idx k) lit)))))))
|
||||
|
||||
(define
|
||||
dl-index-lookup
|
||||
(fn
|
||||
(db rel-key arg-val)
|
||||
(let
|
||||
((idx (get db :facts-index)))
|
||||
(cond
|
||||
((not (has-key? idx rel-key)) (list))
|
||||
(else
|
||||
(let ((rel-idx (get idx rel-key))
|
||||
(k (dl-arg-key arg-val)))
|
||||
(if (has-key? rel-idx k) (get rel-idx k) (list))))))))
|
||||
|
||||
(define dl-tuple-key (fn (lit) (str lit)))
|
||||
|
||||
(define
|
||||
@@ -140,6 +184,7 @@
|
||||
(do
|
||||
(dict-set! key-dict tk true)
|
||||
(append! tuples lit)
|
||||
(dl-index-add! db rel-key lit)
|
||||
true)))))))))
|
||||
|
||||
;; The full safety check lives in builtins.sx (it has to know which
|
||||
|
||||
@@ -24,7 +24,17 @@
|
||||
((nil? rel) (error (str "dl-match-positive: bad literal " lit)))
|
||||
(else
|
||||
(let
|
||||
((tuples (dl-rel-tuples db rel)))
|
||||
;; If the first argument walks to a non-variable (constant
|
||||
;; or already-bound var), use the first-arg index for
|
||||
;; this relation. Otherwise scan the full tuple list.
|
||||
((tuples
|
||||
(cond
|
||||
((>= (len lit) 2)
|
||||
(let ((walked (dl-walk (nth lit 1) subst)))
|
||||
(cond
|
||||
((dl-var? walked) (dl-rel-tuples db rel))
|
||||
(else (dl-index-lookup db rel walked)))))
|
||||
(else (dl-rel-tuples db rel)))))
|
||||
(do
|
||||
(for-each
|
||||
(fn
|
||||
|
||||
@@ -15,5 +15,5 @@
|
||||
{"name":"api","passed":11,"failed":0,"total":11},
|
||||
{"name":"demo","passed":15,"failed":0,"total":15}
|
||||
],
|
||||
"generated": "2026-05-08T09:20:09+00:00"
|
||||
"generated": "2026-05-08T09:27:29+00:00"
|
||||
}
|
||||
|
||||
@@ -120,18 +120,20 @@
|
||||
(dl-sn-counts-agree?
|
||||
(dl-sn-counts "p(a). p(b). q(X) :- p(X), =(X, a)."))
|
||||
true)
|
||||
;; Chain length 10 — exercises multiple semi-naive iterations
|
||||
;; against the recursive ancestor rule.
|
||||
;; Chain length 12 — multiple semi-naive iterations against
|
||||
;; the recursive ancestor rule (differential vs naive).
|
||||
(dl-sn-test!
|
||||
"chain-10 ancestor counts match"
|
||||
(dl-sn-counts-agree? (dl-sn-counts (dl-sn-chain-source 10)))
|
||||
"chain-12 ancestor counts match"
|
||||
(dl-sn-counts-agree? (dl-sn-counts (dl-sn-chain-source 12)))
|
||||
true)
|
||||
;; Chain length 25 — semi-naive only — first-arg index makes
|
||||
;; this tractable in conformance budget.
|
||||
(dl-sn-test!
|
||||
"chain-15 ancestor count value (semi only)"
|
||||
"chain-25 ancestor count value (semi only)"
|
||||
(let
|
||||
((db (dl-program (dl-sn-chain-source 15))))
|
||||
((db (dl-program (dl-sn-chain-source 25))))
|
||||
(do (dl-saturate! db) (len (dl-relation db "ancestor"))))
|
||||
120)
|
||||
325)
|
||||
(dl-sn-test!
|
||||
"query through semi saturate"
|
||||
(let
|
||||
|
||||
Reference in New Issue
Block a user