datalog: first-arg index per relation (Phase 5e perf, 169/169)
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 33s
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 33s
db gains :facts-index {<rel>: {<first-arg-key>: tuples}} mirroring
the membership :facts-keys index. dl-add-fact! populates the index;
dl-match-positive walks the body literal's first arg under the
current subst — when it's bound to a non-var, look up by (str arg)
instead of scanning the full relation.
For chain-style recursive rules (parent X Y), (ancestor Y Z) the
inner Y has at most one parent, so the inner lookup returns 0–1
tuples instead of N. chain-25 saturation drops from ~33s to ~18s
real (~2x). chain-50 still long but tractable; next bottleneck is
subst dict copies during unification.
dl-retract! refreshed to keep the new index consistent: kept-index
rebuilt during EDB filter, IDB wipes clear all three slots.
Differential semi-naive test bumped to chain-12, semi-only count
test to chain-25.
This commit is contained in:
@@ -112,7 +112,8 @@
|
||||
(let
|
||||
((existing (get (get db :facts) rel-key))
|
||||
(kept (list))
|
||||
(kept-keys {}))
|
||||
(kept-keys {})
|
||||
(kept-index {}))
|
||||
(do
|
||||
(for-each
|
||||
(fn
|
||||
@@ -121,10 +122,19 @@
|
||||
(not (dl-tuple-equal? t lit))
|
||||
(do
|
||||
(append! kept t)
|
||||
(dict-set! kept-keys (dl-tuple-key t) true))))
|
||||
(dict-set! kept-keys (dl-tuple-key t) true)
|
||||
(when
|
||||
(>= (len t) 2)
|
||||
(let ((k (dl-arg-key (nth t 1))))
|
||||
(do
|
||||
(when
|
||||
(not (has-key? kept-index k))
|
||||
(dict-set! kept-index k (list)))
|
||||
(append! (get kept-index k) t)))))))
|
||||
existing)
|
||||
(dict-set! (get db :facts) rel-key kept)
|
||||
(dict-set! (get db :facts-keys) rel-key kept-keys))))
|
||||
(dict-set! (get db :facts-keys) rel-key kept-keys)
|
||||
(dict-set! (get db :facts-index) rel-key kept-index))))
|
||||
;; Wipe all relations that have a rule (these are IDB) so the
|
||||
;; saturator regenerates them from the surviving EDB.
|
||||
(let ((rule-heads (dl-rule-head-rels db)))
|
||||
@@ -133,7 +143,8 @@
|
||||
(k)
|
||||
(do
|
||||
(dict-set! (get db :facts) k (list))
|
||||
(dict-set! (get db :facts-keys) k {})))
|
||||
(dict-set! (get db :facts-keys) k {})
|
||||
(dict-set! (get db :facts-index) k {})))
|
||||
rule-heads))
|
||||
(dl-saturate! db)
|
||||
db))))
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
;; lib/datalog/builtins.sx) swaps in the real `dl-rule-check-safety`,
|
||||
;; which is order-aware and understands built-in predicates.
|
||||
|
||||
(define dl-make-db (fn () {:facts {} :facts-keys {} :rules (list)}))
|
||||
(define dl-make-db (fn () {:facts {} :facts-keys {} :facts-index {} :rules (list)}))
|
||||
|
||||
(define
|
||||
dl-rel-name
|
||||
@@ -98,7 +98,8 @@
|
||||
(db rel-key)
|
||||
(let
|
||||
((facts (get db :facts))
|
||||
(fk (get db :facts-keys)))
|
||||
(fk (get db :facts-keys))
|
||||
(fi (get db :facts-index)))
|
||||
(do
|
||||
(when
|
||||
(not (has-key? facts rel-key))
|
||||
@@ -106,8 +107,51 @@
|
||||
(when
|
||||
(not (has-key? fk rel-key))
|
||||
(dict-set! fk rel-key {}))
|
||||
(when
|
||||
(not (has-key? fi rel-key))
|
||||
(dict-set! fi rel-key {}))
|
||||
(get facts rel-key)))))
|
||||
|
||||
;; First-arg index helpers. Tuples are keyed by their first-after-rel
|
||||
;; arg's `(str ...)`; when that arg is a constant, dl-match-positive
|
||||
;; uses the index instead of scanning the full relation.
|
||||
(define
|
||||
dl-arg-key
|
||||
(fn
|
||||
(v)
|
||||
(str v)))
|
||||
|
||||
(define
|
||||
dl-index-add!
|
||||
(fn
|
||||
(db rel-key lit)
|
||||
(let
|
||||
((idx (get db :facts-index))
|
||||
(n (len lit)))
|
||||
(when
|
||||
(and (>= n 2) (has-key? idx rel-key))
|
||||
(let
|
||||
((rel-idx (get idx rel-key))
|
||||
(k (dl-arg-key (nth lit 1))))
|
||||
(do
|
||||
(when
|
||||
(not (has-key? rel-idx k))
|
||||
(dict-set! rel-idx k (list)))
|
||||
(append! (get rel-idx k) lit)))))))
|
||||
|
||||
(define
|
||||
dl-index-lookup
|
||||
(fn
|
||||
(db rel-key arg-val)
|
||||
(let
|
||||
((idx (get db :facts-index)))
|
||||
(cond
|
||||
((not (has-key? idx rel-key)) (list))
|
||||
(else
|
||||
(let ((rel-idx (get idx rel-key))
|
||||
(k (dl-arg-key arg-val)))
|
||||
(if (has-key? rel-idx k) (get rel-idx k) (list))))))))
|
||||
|
||||
(define dl-tuple-key (fn (lit) (str lit)))
|
||||
|
||||
(define
|
||||
@@ -140,6 +184,7 @@
|
||||
(do
|
||||
(dict-set! key-dict tk true)
|
||||
(append! tuples lit)
|
||||
(dl-index-add! db rel-key lit)
|
||||
true)))))))))
|
||||
|
||||
;; The full safety check lives in builtins.sx (it has to know which
|
||||
|
||||
@@ -24,7 +24,17 @@
|
||||
((nil? rel) (error (str "dl-match-positive: bad literal " lit)))
|
||||
(else
|
||||
(let
|
||||
((tuples (dl-rel-tuples db rel)))
|
||||
;; If the first argument walks to a non-variable (constant
|
||||
;; or already-bound var), use the first-arg index for
|
||||
;; this relation. Otherwise scan the full tuple list.
|
||||
((tuples
|
||||
(cond
|
||||
((>= (len lit) 2)
|
||||
(let ((walked (dl-walk (nth lit 1) subst)))
|
||||
(cond
|
||||
((dl-var? walked) (dl-rel-tuples db rel))
|
||||
(else (dl-index-lookup db rel walked)))))
|
||||
(else (dl-rel-tuples db rel)))))
|
||||
(do
|
||||
(for-each
|
||||
(fn
|
||||
|
||||
@@ -15,5 +15,5 @@
|
||||
{"name":"api","passed":11,"failed":0,"total":11},
|
||||
{"name":"demo","passed":15,"failed":0,"total":15}
|
||||
],
|
||||
"generated": "2026-05-08T09:20:09+00:00"
|
||||
"generated": "2026-05-08T09:27:29+00:00"
|
||||
}
|
||||
|
||||
@@ -120,18 +120,20 @@
|
||||
(dl-sn-counts-agree?
|
||||
(dl-sn-counts "p(a). p(b). q(X) :- p(X), =(X, a)."))
|
||||
true)
|
||||
;; Chain length 10 — exercises multiple semi-naive iterations
|
||||
;; against the recursive ancestor rule.
|
||||
;; Chain length 12 — multiple semi-naive iterations against
|
||||
;; the recursive ancestor rule (differential vs naive).
|
||||
(dl-sn-test!
|
||||
"chain-10 ancestor counts match"
|
||||
(dl-sn-counts-agree? (dl-sn-counts (dl-sn-chain-source 10)))
|
||||
"chain-12 ancestor counts match"
|
||||
(dl-sn-counts-agree? (dl-sn-counts (dl-sn-chain-source 12)))
|
||||
true)
|
||||
;; Chain length 25 — semi-naive only — first-arg index makes
|
||||
;; this tractable in conformance budget.
|
||||
(dl-sn-test!
|
||||
"chain-15 ancestor count value (semi only)"
|
||||
"chain-25 ancestor count value (semi only)"
|
||||
(let
|
||||
((db (dl-program (dl-sn-chain-source 15))))
|
||||
((db (dl-program (dl-sn-chain-source 25))))
|
||||
(do (dl-saturate! db) (len (dl-relation db "ancestor"))))
|
||||
120)
|
||||
325)
|
||||
(dl-sn-test!
|
||||
"query through semi saturate"
|
||||
(let
|
||||
|
||||
@@ -264,19 +264,33 @@ large graphs.
|
||||
|
||||
## Blockers
|
||||
|
||||
- **Saturation perf** improving but not free. Resolved hash-set
|
||||
membership in `dl-add-fact!` and replaced recursive `(rest lits)` in
|
||||
`dl-find-bindings` with indexed iteration. chain-15 drops from ~25s
|
||||
to ~16s and chain-25 saturates in ~33s real / 11s user — most CPU
|
||||
now in unification (assoc-based subst dict copies) and dict
|
||||
lookups during walks. Future: a per-rule "compiled" body that
|
||||
pre-resolves arg positions and intern variable indices, then
|
||||
unification can use array slots instead of dict assoc.
|
||||
- **Saturation perf**: three rounds done.
|
||||
- hash-set membership in `dl-add-fact!` (Phase 5b)
|
||||
- indexed iteration in `dl-find-bindings` (Phase 5c)
|
||||
- first-arg index per relation (Phase 5e) — when a body literal's
|
||||
first arg walks to a non-variable, dl-match-positive looks up
|
||||
by `(str arg)` instead of scanning the full relation.
|
||||
chain-25 saturation drops from ~33s to ~18s real (10s user).
|
||||
chain-50 still long (~120s+) due to dict-copy overhead in
|
||||
unification subst threading. Future: per-rule "compiled" body
|
||||
with pre-resolved var positions, slot-based subst representation
|
||||
to avoid `assoc` per binding.
|
||||
|
||||
## Progress log
|
||||
|
||||
_Newest first._
|
||||
|
||||
- 2026-05-08 — Phase 5e perf: first-arg index per relation. db gains
|
||||
`:facts-index {<rel>: {<first-arg-key>: tuples}}` mirroring the
|
||||
existing `:facts-keys` membership index. `dl-add-fact!` populates
|
||||
it; `dl-match-positive` walks the body literal's first arg under
|
||||
the current subst — if it's bound to a non-var, look up by
|
||||
`(str arg)` and iterate only the matching subset. chain-25
|
||||
saturation 33s → 18s real (~2x). chain-50 still slow (~120s+)
|
||||
but tractable; next bottleneck is subst dict copies during
|
||||
unification. Differential test bumped to chain-12, semi-only
|
||||
count to chain-25.
|
||||
|
||||
- 2026-05-08 — Demo: tag co-occurrence. `(cotagged P T1 T2)` — post
|
||||
has both T1 and T2 with T1 != T2 — and `(tag-pair-count T1 T2 N)`
|
||||
counting posts per distinct tag pair. Demonstrates count
|
||||
|
||||
Reference in New Issue
Block a user