;; lib/host/otel.sx — OpenTelemetry in SX, P1: the span model + API. ;; ;; A TRACE is a COMPOSITION: a span is {:trace :span :parent :name :t0 :t1 :attrs ;; :events}, so a trace is a tree of spans (the same shape as an object's :body ;; composition). Later phases fold that tree into a waterfall (render-fold), OTLP ;; JSON (export-fold) and metrics (aggregate-fold). This phase gives us the model, ;; the dynamic parent stack that builds the tree, and a bounded in-memory ring ;; buffer — spans are cheap timed effects, NOT durable KV rows. ;; ── monotonic id + clock ───────────────────────────────────────────── ;; A simple process-monotonic counter gives collision-free ids without needing a ;; random source (Math.random/Date.now aren't available on this host). (define otel/-id-counter 0) (define otel/-next-id (fn (prefix) (begin (set! otel/-id-counter (+ otel/-id-counter 1)) (str prefix otel/-id-counter)))) (define otel/gen-trace-id (fn () (otel/-next-id "trace-"))) (define otel/gen-span-id (fn () (otel/-next-id "span-"))) ;; now-ns — real epoch time in NANOSECONDS (the unit OTLP wants for ;; start/endTimeUnixNano). The OCaml host exposes `clock-milliseconds` ;; (Unix.gettimeofday, epoch ms); we scale by 1e6. Wall clocks can step ;; backwards (NTP), so we clamp against a high-water mark: now-ns never ;; decreases, keeping span durations non-negative even across a clock step. (define otel/-last-ns 0) (define otel/now-ns (fn () (let ((raw (* (clock-milliseconds) 1000000))) (let ((t (if (> raw otel/-last-ns) raw otel/-last-ns))) (begin (set! otel/-last-ns t) t))))) ;; ── the dynamic parent stack ───────────────────────────────────────── ;; head = the innermost (current) span context {:span id :trace id}. Pushing on ;; with-span entry / popping on exit is what turns lexical nesting into parent ;; links, so a trace tree falls out of ordinary call nesting. (define otel/-stack (list)) (define otel/-push! (fn (ctx) (set! otel/-stack (cons ctx otel/-stack)))) (define otel/-pop! (fn () (set! otel/-stack (rest otel/-stack)))) (define otel/-top (fn () (if (= (len otel/-stack) 0) nil (first otel/-stack)))) (define otel/current-span (fn () (let ((t (otel/-top))) (if t (get t :span) nil)))) (define otel/current-trace (fn () (let ((t (otel/-top))) (if t (get t :trace) nil)))) ;; ── the bounded ring buffer ────────────────────────────────────────── ;; Oldest → newest. record! appends and drops from the front once over cap, so the ;; buffer stays O(cap) no matter how many spans flow through. NOT persisted. (define otel/-cap 1000) (define otel/-ring (list)) (define otel/set-cap! (fn (n) (begin (set! otel/-cap n) nil))) (define otel/record! (fn (span) (begin (set! otel/-ring (append otel/-ring (list span))) (if (> (len otel/-ring) otel/-cap) (set! otel/-ring (slice otel/-ring (- (len otel/-ring) otel/-cap))) nil) span))) (define otel/recent (fn () otel/-ring)) ;; Clear the ring AND the parent stack — used between tests / requests. (define otel/reset! (fn () (begin (set! otel/-ring (list)) (set! otel/-stack (list)) nil))) ;; ── -timed: the shared timed-effect core ───────────────────────────── ;; Records a span around (thunk): a fresh span id, the trace inherited from the ;; enclosing span (or a new trace at the root), the enclosing span as :parent, and ;; t0/t1 straddling the call. Pushes its context so nested spans see it as parent, ;; pops after, then records the finished span. `finalize` maps the thunk's result ;; to EXTRA attrs merged onto `attrs` (e.g. an http.status only known post-call). ;; Returns the thunk's value unchanged. (define otel/-timed (fn (name attrs finalize thunk) (let ((parent-ctx (otel/-top))) (let ((trace (if parent-ctx (get parent-ctx :trace) (otel/gen-trace-id))) (span (otel/gen-span-id)) (parent (if parent-ctx (get parent-ctx :span) nil))) (let ((t0 (begin (otel/-push! {:span span :trace trace}) (otel/now-ns)))) ;; guard the body: on a raise, pop + record an ERROR span (status + ;; exception event) as a SIDE EFFECT in a clause test that returns false, ;; so the condition auto-propagates to the outer handler. (An explicit ;; `(raise e)` inside a guard handler re-enters this guard and hangs.) (guard (e ((begin (let ((t1 (otel/now-ns))) (begin (otel/-pop!) (otel/record! {:trace trace :span span :parent parent :name name :t0 t0 :t1 t1 :status "error" :attrs attrs :events (list {:name "exception" :time t1 :message (str e)})}))) false) nil)) (let ((result (thunk))) (let ((t1 (otel/now-ns))) (begin (otel/-pop!) (otel/record! {:trace trace :span span :parent parent :name name :t0 t0 :t1 t1 :status "ok" :attrs (merge attrs (finalize result)) :events (list)}) result))))))))) ;; with-span — a plain timed span; no result-derived attrs. (define otel/with-span (fn (name attrs thunk) (otel/-timed name attrs (fn (r) {}) thunk))) ;; ── auto-instrument HTTP handlers ───────────────────────────────────── ;; A trace is a composition; a request handler is a timed effect. Wrap one Dream ;; route's handler so dispatching it records a root span "METHOD /route" with ;; {:http.method :http.route :http.status}. The status is only known after the ;; handler runs, so it's a finalize attr; the result is coerced the same way the ;; router coerces it, so a bare-string handler still reports 200. The raw handler ;; result is returned untouched so dr/run-route's own coercion is unchanged. (define otel/instrument-route (fn (r) (let ((method (dream-route-method r)) (path (dream-route-path r)) (handler (dream-route-handler r))) (dream-route method path (fn (req) (otel/-timed (str method " " path) {:http.method method :http.route path} (fn (resp) {:http.status (dream-status (dream-coerce-response resp))}) (fn () (handler req)))))))) ;; Flatten nested route groups and instrument each route — the host/make-app seam. (define otel/instrument-routes (fn (routes) (map otel/instrument-route (dr/flatten-routes routes)))) ;; ── P4: render-fold → SVG waterfall ─────────────────────────────────── ;; A trace is a tree of spans; a waterfall is a fold over that tree into SVG (the ;; same discipline as the compose render-fold: walk the structure, emit per node). ;; Layout: x ∝ (t0 − trace.t0), width ∝ duration, y ∝ depth (ancestor count via ;; parent links). Geometry is computed as plain rect data first (otel/waterfall- ;; rects, testable), then folded into markup (otel/waterfall). (define otel/-row-h 18) (define otel/-svg-w 640) (define otel/-pad 4) ;; all spans belonging to one trace, from the ring. (define otel/trace-spans (fn (trace-id) (filter (fn (s) (= (get s :trace) trace-id)) (otel/recent)))) (define otel/-span-by-id (fn (spans id) (first (filter (fn (s) (= (get s :span) id)) spans)))) ;; depth = number of ancestors reachable via :parent within this trace's spans. (define otel/-depth (fn (s spans) (if (nil? (get s :parent)) 0 (let ((p (otel/-span-by-id spans (get s :parent)))) (if (nil? p) 0 (+ 1 (otel/-depth p spans))))))) ;; trace start = earliest t0; end = latest t1 (folds over the spans). (define otel/-min-t0 (fn (spans) (reduce (fn (m s) (min m (get s :t0))) (get (first spans) :t0) spans))) (define otel/-max-t1 (fn (spans) (reduce (fn (m s) (max m (get s :t1))) (get (first spans) :t1) spans))) ;; a rect per span: {:span :name :depth :x :y :w}. x/width are scaled to the ;; drawable width; a zero-duration span still gets a 1px sliver so it's visible. (define otel/waterfall-rects (fn (trace-id) (let ((spans (otel/trace-spans trace-id))) (if (empty? spans) (list) (let ((t0 (otel/-min-t0 spans)) (t1 (otel/-max-t1 spans)) (iw (- otel/-svg-w (* 2 otel/-pad)))) (let ((total (if (> (- t1 t0) 0) (- t1 t0) 1))) (map (fn (s) (let ((d (otel/-depth s spans)) (dur (- (get s :t1) (get s :t0)))) (let ((raw-w (* (/ dur total) iw))) {:span (get s :span) :name (get s :name) :depth d :dur dur :status (get s :status) :x (+ otel/-pad (* (/ (- (get s :t0) t0) total) iw)) :y (+ otel/-pad (* d otel/-row-h)) :w (if (> raw-w 1) raw-w 1)}))) spans))))))) ;; ns → a compact "Nms" / "Nus" label. (define otel/-ms (fn (ns) (if (>= ns 1000000) (str (quotient ns 1000000) "ms") (str (quotient ns 1000) "us")))) ;; error spans render red, everything else teal. (define otel/-bar-fill (fn (status) (if (= status "error") "#e45756" "#4c9a8f"))) ;; one (bar + name + duration) per rect. The bar is the only , so ;; count(rect) == count(span); axis chrome uses /. (define otel/-rect->g (fn (r) (quasiquote (g (rect :x (unquote (get r :x)) :y (unquote (get r :y)) :width (unquote (get r :w)) :height (unquote (- otel/-row-h 2)) :fill (unquote (otel/-bar-fill (get r :status))) :rx 2) (text :x (unquote (+ (get r :x) 3)) :y (unquote (+ (get r :y) 12)) :font-size 10 :fill "#ffffff" (unquote (str (get r :name) " " (otel/-ms (get r :dur))))))))) ;; max depth across rects (0 when empty) → svg height. (define otel/-max-depth (fn (rects) (reduce (fn (m r) (max m (get r :depth))) 0 rects))) ;; the trace as an inline waterfall. (define otel/waterfall (fn (trace-id) (let ((rects (otel/waterfall-rects trace-id))) (let ((h (+ (* 2 otel/-pad) (* (+ (otel/-max-depth rects) 1) otel/-row-h)))) (quasiquote (svg :width (unquote otel/-svg-w) :height (unquote h) :xmlns "http://www.w3.org/2000/svg" (splice-unquote (map otel/-rect->g rects)))))))) ;; count nodes whose head symbol prints as `head` — a small SVG-tree assertion aid. (define otel/-tree-count (fn (tree head) (if (list? tree) (let ((self (if (and (not (empty? tree)) (= (str (first tree)) head)) 1 0))) (reduce (fn (acc n) (+ acc (otel/-tree-count n head))) self tree)) 0))) ;; ── P5: metrics (aggregate-fold) ────────────────────────────────────── ;; Fold the recent spans into per-route request counts + a latency histogram ;; (p50/p95/p99 from durations). No `sort` primitive here, so percentiles ride a ;; tiny insertion sort; nearest-rank keeps the maths exact for the tests. (define otel/-insert (fn (x sorted) (if (empty? sorted) (list x) (if (<= x (first sorted)) (cons x sorted) (cons (first sorted) (otel/-insert x (rest sorted))))))) (define otel/-sort-nums (fn (lst) (reduce (fn (acc x) (otel/-insert x acc)) (list) lst))) ;; nearest-rank percentile of an ASCENDING list: rank = ceil(p/100 · N), 1-based. (define otel/-percentile (fn (sorted p) (if (empty? sorted) 0 (let ((n (len sorted))) (let ((idx (- (ceil (* (/ p 100) n)) 1))) (let ((i (if (< idx 0) 0 (if (>= idx n) (- n 1) idx)))) (nth sorted i))))))) ;; a span's route label: the http.route attr, else the span name. (define otel/-span-route (fn (s) (or (get (get s :attrs) :http.route) (get s :name)))) (define otel/-span-dur (fn (s) (- (get s :t1) (get s :t0)))) ;; distinct values, order-preserving. (define otel/-distinct (fn (lst) (reduce (fn (acc x) (if (some (fn (y) (= y x)) acc) acc (append acc (list x)))) (list) lst))) ;; the aggregate for one route: count + latency percentiles over its durations. (define otel/route-metrics (fn (spans route) (let ((rs (filter (fn (s) (= (otel/-span-route s) route)) spans))) (let ((durs (otel/-sort-nums (map otel/-span-dur rs)))) {:route route :count (len rs) :p50 (otel/-percentile durs 50) :p95 (otel/-percentile durs 95) :p99 (otel/-percentile durs 99)})))) ;; fold spans → {:total-requests N :routes (per-route metrics …)}. (define otel/metrics (fn (spans) (let ((routes (otel/-distinct (map otel/-span-route spans)))) {:total-requests (len spans) :routes (map (fn (r) (otel/route-metrics spans r)) routes)}))) ;; convenience: metrics over the current ring. (define otel/metrics-recent (fn () (otel/metrics (otel/recent)))) ;; ── P6: live dashboard (SSR + SSE) ──────────────────────────────────── ;; GET /otel renders a dashboard — metrics strip + the latest trace's waterfall + ;; a recent-traces list — as server-rendered HTML carrying Datastar-style reactive ;; attributes that subscribe to GET /otel/stream, the SSE feed of new span events ;; (SXTP events, the host's Datastar-borrowed wire format). SSR + declarative ;; reactive attrs + SSE patches IS the reactive-island model here. (Live client ;; hydration is a deploy concern; SSR, the event feed, and the data are tested.) ;; recent traces, newest-first: {:trace :name :spans}. root name = the parentless ;; span (fallback: first recorded). (define otel/-trace-ids (fn () (otel/-distinct (map (fn (s) (get s :trace)) (otel/recent))))) (define otel/-trace-root-name (fn (trace-id) (let ((spans (otel/trace-spans trace-id))) (let ((roots (filter (fn (s) (nil? (get s :parent))) spans))) (cond ((not (empty? roots)) (get (first roots) :name)) ((not (empty? spans)) (get (first spans) :name)) (else "")))))) (define otel/trace-summary (fn (trace-id) {:trace trace-id :name (otel/-trace-root-name trace-id) :spans (len (otel/trace-spans trace-id))})) (define otel/recent-traces (fn () (reverse (map otel/trace-summary (otel/-trace-ids))))) (define otel/latest-trace (fn () (let ((r (otel/recent))) (if (empty? r) nil (get (last r) :trace))))) ;; ── SSE span events (SXTP) ──────────────────────────────────────────── (define otel/span-event (fn (s) (sxtp/event "otel.span" {:id (get s :span) :body {:trace (get s :trace) :span (get s :span) :parent (get s :parent) :name (get s :name) :t0 (get s :t0) :t1 (get s :t1) :attrs (get s :attrs)} :time (get s :t1)}))) (define otel/latest-span-event (fn () (let ((r (otel/recent))) (if (empty? r) nil (otel/span-event (last r)))))) ;; one SSE frame: `event: otel.span\n data: \n\n`; "" when no spans. (define otel/-stream-body (fn () (let ((e (otel/latest-span-event))) (if (nil? e) "" (str "event: otel.span\ndata: " (sxtp/serialize e) "\n\n"))))) ;; ── dashboard markup (plain HTML tags → render-to-html SSRs cleanly) ── (define otel/-metrics-strip (fn (m) (quasiquote (table :class "otel-metrics" (tr (th "route") (th "count") (th "p50") (th "p95") (th "p99")) (splice-unquote (map (fn (r) (quasiquote (tr (td (unquote (str (get r :route)))) (td (unquote (str (get r :count)))) (td (unquote (str (get r :p50)))) (td (unquote (str (get r :p95)))) (td (unquote (str (get r :p99))))))) (get m :routes))))))) (define otel/-traces-list (fn (traces) (quasiquote (ul :class "otel-traces" (splice-unquote (map (fn (t) (quasiquote (li :data-trace (unquote (str (get t :trace))) (unquote (str (get t :name) " — " (get t :spans) " spans"))))) traces)))))) ;; a per-route latency bar chart: each row is a nested bar — teal p50 inside amber ;; p95 inside light-red p99 — so the tail (red beyond amber) is visible at a glance. (define otel/-latency-chart (fn (m) (let ((routes (get m :routes))) (if (empty? routes) (quasiquote (p :class "otel-empty" "no metrics yet")) (let ((maxp99 (reduce (fn (mx r) (max mx (get r :p99))) 1 routes)) (label-w 150) (bar-w 340) (row-h 24)) (quasiquote (svg :width 560 :height (unquote (+ (* (len routes) row-h) 6)) :xmlns "http://www.w3.org/2000/svg" :font-family "monospace" (splice-unquote (map-indexed (fn (i r) (let ((y (* i row-h))) (let ((p99w (max 1 (* (/ (get r :p99) maxp99) bar-w))) (p95w (max 1 (* (/ (get r :p95) maxp99) bar-w))) (p50w (max 1 (* (/ (get r :p50) maxp99) bar-w)))) (quasiquote (g (text :x 0 :y (unquote (+ y 15)) :font-size 11 :fill "#333" (unquote (get r :route))) (rect :x (unquote label-w) :y (unquote (+ y 5)) :width (unquote p99w) :height 14 :fill "#f2c2c2" :rx 3) (rect :x (unquote label-w) :y (unquote (+ y 5)) :width (unquote p95w) :height 14 :fill "#f2a63b" :rx 3) (rect :x (unquote label-w) :y (unquote (+ y 5)) :width (unquote p50w) :height 14 :fill "#4c9a8f" :rx 3) (text :x (unquote (+ label-w p99w 5)) :y (unquote (+ y 15)) :font-size 9 :fill "#999" (unquote (str "p99 " (otel/-ms (get r :p99)))))))))) routes))))))))) (define otel/dashboard (fn () (let ((m (otel/metrics (otel/recent))) (lt (otel/latest-trace)) (traces (otel/recent-traces))) (quasiquote (div :id "otel-dashboard" ;; The host serves single-body responses (no server-push SSE), so the ;; dashboard stays live by reloading itself — every 3s it re-renders the ;; latest metrics + trace. /otel/stream remains a snapshot of the newest ;; span for any client that wants to poll it. (meta :http-equiv "refresh" :content "3") (h1 "OpenTelemetry") (p :style "font-size:0.8em;opacity:0.7" "live · auto-refreshes every 3s") (h2 "latency by route") (p :style "font-size:0.75em;opacity:0.7" (span :style "color:#4c9a8f" "▉ p50") " " (span :style "color:#f2a63b" "▉ p95") " " (span :style "color:#f2c2c2" "▉ p99 (tail)")) (unquote (otel/-latency-chart m)) (h2 "metrics") (unquote (otel/-metrics-strip m)) (h2 "latest trace") (unquote (if (nil? lt) (quasiquote (p :class "otel-empty" "no traces yet")) (otel/waterfall lt))) (h2 "recent traces") (unquote (otel/-traces-list traces))))))) ;; ── routes ──────────────────────────────────────────────────────────── (define otel/dashboard-route (dream-get "/otel" (fn (req) (dream-html (render-to-html (otel/dashboard) {}))))) (define otel/stream-route (dream-get "/otel/stream" (fn (req) (dream-response 200 {:content-type "text/event-stream"} (otel/-stream-body))))) (define otel/routes (list otel/dashboard-route otel/stream-route)) ;; ── P7: OTLP-JSON export ────────────────────────────────────────────── ;; Serialize spans to the OTLP/JSON schema (resourceSpans → scopeSpans → spans) ;; for interop with Jaeger/Grafana. traceId/spanId/parentSpanId are hex strings; ;; timestamps are uint64-as-string; attributes are typed {key,value}. Export is a ;; fold (span → OTLP span); POST goes through an INJECTED transport so it's ;; testable without a live collector. ;; our "trace-N"/"span-N" ids → a fixed-width lowercase-hex id (real OTLP widths: ;; traceId 16 bytes = 32 hex, spanId 8 bytes = 16 hex). Deterministic from the id. (define otel/-id-num (fn (id) (let ((n (string->number (last (split id "-"))))) (if (nil? n) 0 n)))) (define otel/-zeros (fn (k) (if (<= k 0) "" (str "0" (otel/-zeros (- k 1)))))) (define otel/-pad-hex (fn (n width) (let ((h (number->string n 16))) (str (otel/-zeros (- width (len h))) h)))) (define otel/-trace-hex (fn (id) (otel/-pad-hex (otel/-id-num id) 32))) (define otel/-span-hex (fn (id) (otel/-pad-hex (otel/-id-num id) 16))) ;; a value → OTLP AnyValue: numbers are intValue (uint64-as-string), else stringValue. (define otel/-otlp-value (fn (v) (if (= (type-of v) "number") {:intValue (str v)} {:stringValue (str v)}))) (define otel/-otlp-attrs (fn (attrs) (map (fn (k) {:key (str k) :value (otel/-otlp-value (get attrs k))}) (keys attrs)))) ;; span kind: 2 = SERVER for an instrumented request (has http.method), else ;; 1 = INTERNAL. (define otel/-otlp-kind (fn (attrs) (if (get attrs :http.method) 2 1))) (define otel/-otlp-span (fn (s) (let ((attrs (or (get s :attrs) {}))) (let ((base {:traceId (otel/-trace-hex (get s :trace)) :spanId (otel/-span-hex (get s :span)) :name (get s :name) :kind (otel/-otlp-kind attrs) :startTimeUnixNano (str (get s :t0)) :endTimeUnixNano (str (get s :t1)) :attributes (otel/-otlp-attrs attrs)})) (if (nil? (get s :parent)) base (assoc base :parentSpanId (otel/-span-hex (get s :parent)))))))) ;; spans → the OTLP export envelope (a JSON-shaped SX value). (define otel/export-otlp (fn (spans) {:resourceSpans (list {:resource {:attributes (list {:key "service.name" :value {:stringValue "rose-ash-host"}})} :scopeSpans (list {:scope {:name "otel-on-sx" :version "0.1.0"} :spans (map otel/-otlp-span spans)})})})) (define otel/export-otlp-json (fn (spans) (dream-json-encode (otel/export-otlp spans)))) ;; POST the OTLP payload through an injected transport. `transport` is a fn taking ;; a request dict {:method :url :headers :body} — real deploys pass an http POST; ;; tests pass a recorder. Returns whatever the transport returns. (define otel/post-otlp (fn (endpoint spans transport) (transport {:method "POST" :url endpoint :headers {:content-type "application/json"} :body (otel/export-otlp-json spans)}))) ;; ── P8: W3C traceparent context propagation ─────────────────────────── ;; traceparent = "---" (RFC: version 00, ;; trace-id 32 hex, parent-id 16 hex, flags 2 hex; low bit = sampled). Emitting it ;; on outbound inter-service calls and parsing it on inbound requests lets ONE ;; trace span multiple services. Our internal ids hex-encode via the OTLP helpers. (define otel/format-traceparent (fn (trace-id span-id) (str "00-" (otel/-trace-hex trace-id) "-" (otel/-span-hex span-id) "-01"))) ;; the traceparent for the CURRENT span (nil outside any span) — for outbound calls. (define otel/current-traceparent (fn () (let ((t (otel/-top))) (if (nil? t) nil (otel/format-traceparent (get t :trace) (get t :span)))))) ;; parse an inbound traceparent → {:version :trace-id :parent-id :flags :sampled}; ;; nil if absent or malformed (wrong field count / hex widths). (define otel/parse-traceparent (fn (header) (if (or (nil? header) (= header "")) nil (let ((parts (split header "-"))) (if (= (len parts) 4) (let ((ver (nth parts 0)) (tid (nth parts 1)) (pid (nth parts 2)) (flags (nth parts 3))) (if (and (= (len tid) 32) (= (len pid) 16)) {:version ver :trace-id tid :parent-id pid :flags flags :sampled (not (= flags "00"))} nil)) nil)))))