datalog: first-arg index per relation (Phase 5e perf, 169/169)
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 33s

db gains :facts-index {<rel>: {<first-arg-key>: tuples}} mirroring
the membership :facts-keys index. dl-add-fact! populates the index;
dl-match-positive walks the body literal's first arg under the
current subst — when it's bound to a non-var, look up by (str arg)
instead of scanning the full relation.

For chain-style recursive rules (parent X Y), (ancestor Y Z) the
inner Y has at most one parent, so the inner lookup returns 0–1
tuples instead of N. chain-25 saturation drops from ~33s to ~18s
real (~2x). chain-50 still long but tractable; next bottleneck is
subst dict copies during unification.

dl-retract! refreshed to keep the new index consistent: kept-index
rebuilt during EDB filter, IDB wipes clear all three slots.

Differential semi-naive test bumped to chain-12, semi-only count
test to chain-25.
This commit is contained in:
2026-05-08 09:27:44 +00:00
parent c7315f5877
commit cc64ec5cf2
6 changed files with 105 additions and 23 deletions

View File

@@ -112,7 +112,8 @@
(let
((existing (get (get db :facts) rel-key))
(kept (list))
(kept-keys {}))
(kept-keys {})
(kept-index {}))
(do
(for-each
(fn
@@ -121,10 +122,19 @@
(not (dl-tuple-equal? t lit))
(do
(append! kept t)
(dict-set! kept-keys (dl-tuple-key t) true))))
(dict-set! kept-keys (dl-tuple-key t) true)
(when
(>= (len t) 2)
(let ((k (dl-arg-key (nth t 1))))
(do
(when
(not (has-key? kept-index k))
(dict-set! kept-index k (list)))
(append! (get kept-index k) t)))))))
existing)
(dict-set! (get db :facts) rel-key kept)
(dict-set! (get db :facts-keys) rel-key kept-keys))))
(dict-set! (get db :facts-keys) rel-key kept-keys)
(dict-set! (get db :facts-index) rel-key kept-index))))
;; Wipe all relations that have a rule (these are IDB) so the
;; saturator regenerates them from the surviving EDB.
(let ((rule-heads (dl-rule-head-rels db)))
@@ -133,7 +143,8 @@
(k)
(do
(dict-set! (get db :facts) k (list))
(dict-set! (get db :facts-keys) k {})))
(dict-set! (get db :facts-keys) k {})
(dict-set! (get db :facts-index) k {})))
rule-heads))
(dl-saturate! db)
db))))

View File

@@ -12,7 +12,7 @@
;; lib/datalog/builtins.sx) swaps in the real `dl-rule-check-safety`,
;; which is order-aware and understands built-in predicates.
(define dl-make-db (fn () {:facts {} :facts-keys {} :rules (list)}))
(define dl-make-db (fn () {:facts {} :facts-keys {} :facts-index {} :rules (list)}))
(define
dl-rel-name
@@ -98,7 +98,8 @@
(db rel-key)
(let
((facts (get db :facts))
(fk (get db :facts-keys)))
(fk (get db :facts-keys))
(fi (get db :facts-index)))
(do
(when
(not (has-key? facts rel-key))
@@ -106,8 +107,51 @@
(when
(not (has-key? fk rel-key))
(dict-set! fk rel-key {}))
(when
(not (has-key? fi rel-key))
(dict-set! fi rel-key {}))
(get facts rel-key)))))
;; First-arg index helpers. Tuples are keyed by their first-after-rel
;; arg's `(str ...)`; when that arg is a constant, dl-match-positive
;; uses the index instead of scanning the full relation.
(define
dl-arg-key
(fn
(v)
(str v)))
(define
dl-index-add!
(fn
(db rel-key lit)
(let
((idx (get db :facts-index))
(n (len lit)))
(when
(and (>= n 2) (has-key? idx rel-key))
(let
((rel-idx (get idx rel-key))
(k (dl-arg-key (nth lit 1))))
(do
(when
(not (has-key? rel-idx k))
(dict-set! rel-idx k (list)))
(append! (get rel-idx k) lit)))))))
(define
dl-index-lookup
(fn
(db rel-key arg-val)
(let
((idx (get db :facts-index)))
(cond
((not (has-key? idx rel-key)) (list))
(else
(let ((rel-idx (get idx rel-key))
(k (dl-arg-key arg-val)))
(if (has-key? rel-idx k) (get rel-idx k) (list))))))))
(define dl-tuple-key (fn (lit) (str lit)))
(define
@@ -140,6 +184,7 @@
(do
(dict-set! key-dict tk true)
(append! tuples lit)
(dl-index-add! db rel-key lit)
true)))))))))
;; The full safety check lives in builtins.sx (it has to know which

View File

@@ -24,7 +24,17 @@
((nil? rel) (error (str "dl-match-positive: bad literal " lit)))
(else
(let
((tuples (dl-rel-tuples db rel)))
;; If the first argument walks to a non-variable (constant
;; or already-bound var), use the first-arg index for
;; this relation. Otherwise scan the full tuple list.
((tuples
(cond
((>= (len lit) 2)
(let ((walked (dl-walk (nth lit 1) subst)))
(cond
((dl-var? walked) (dl-rel-tuples db rel))
(else (dl-index-lookup db rel walked)))))
(else (dl-rel-tuples db rel)))))
(do
(for-each
(fn

View File

@@ -15,5 +15,5 @@
{"name":"api","passed":11,"failed":0,"total":11},
{"name":"demo","passed":15,"failed":0,"total":15}
],
"generated": "2026-05-08T09:20:09+00:00"
"generated": "2026-05-08T09:27:29+00:00"
}

View File

@@ -120,18 +120,20 @@
(dl-sn-counts-agree?
(dl-sn-counts "p(a). p(b). q(X) :- p(X), =(X, a)."))
true)
;; Chain length 10 exercises multiple semi-naive iterations
;; against the recursive ancestor rule.
;; Chain length 12 — multiple semi-naive iterations against
;; the recursive ancestor rule (differential vs naive).
(dl-sn-test!
"chain-10 ancestor counts match"
(dl-sn-counts-agree? (dl-sn-counts (dl-sn-chain-source 10)))
"chain-12 ancestor counts match"
(dl-sn-counts-agree? (dl-sn-counts (dl-sn-chain-source 12)))
true)
;; Chain length 25 — semi-naive only — first-arg index makes
;; this tractable in conformance budget.
(dl-sn-test!
"chain-15 ancestor count value (semi only)"
"chain-25 ancestor count value (semi only)"
(let
((db (dl-program (dl-sn-chain-source 15))))
((db (dl-program (dl-sn-chain-source 25))))
(do (dl-saturate! db) (len (dl-relation db "ancestor"))))
120)
325)
(dl-sn-test!
"query through semi saturate"
(let

View File

@@ -264,19 +264,33 @@ large graphs.
## Blockers
- **Saturation perf** improving but not free. Resolved hash-set
membership in `dl-add-fact!` and replaced recursive `(rest lits)` in
`dl-find-bindings` with indexed iteration. chain-15 drops from ~25s
to ~16s and chain-25 saturates in ~33s real / 11s user — most CPU
now in unification (assoc-based subst dict copies) and dict
lookups during walks. Future: a per-rule "compiled" body that
pre-resolves arg positions and intern variable indices, then
unification can use array slots instead of dict assoc.
- **Saturation perf**: three rounds done.
- hash-set membership in `dl-add-fact!` (Phase 5b)
- indexed iteration in `dl-find-bindings` (Phase 5c)
- first-arg index per relation (Phase 5e) — when a body literal's
first arg walks to a non-variable, dl-match-positive looks up
by `(str arg)` instead of scanning the full relation.
chain-25 saturation drops from ~33s to ~18s real (10s user).
chain-50 still long (~120s+) due to dict-copy overhead in
unification subst threading. Future: per-rule "compiled" body
with pre-resolved var positions, slot-based subst representation
to avoid `assoc` per binding.
## Progress log
_Newest first._
- 2026-05-08 — Phase 5e perf: first-arg index per relation. db gains
`:facts-index {<rel>: {<first-arg-key>: tuples}}` mirroring the
existing `:facts-keys` membership index. `dl-add-fact!` populates
it; `dl-match-positive` walks the body literal's first arg under
the current subst — if it's bound to a non-var, look up by
`(str arg)` and iterate only the matching subset. chain-25
saturation 33s → 18s real (~2x). chain-50 still slow (~120s+)
but tractable; next bottleneck is subst dict copies during
unification. Differential test bumped to chain-12, semi-only
count to chain-25.
- 2026-05-08 — Demo: tag co-occurrence. `(cotagged P T1 T2)` — post
has both T1 and T2 with T1 != T2 — and `(tag-pair-count T1 T2 N)`
counting posts per distinct tag pair. Demonstrates count