693 lines
31 KiB
Plaintext
693 lines
31 KiB
Plaintext
;; lib/host/otel.sx — OpenTelemetry in SX, P1: the span model + API.
|
||
;;
|
||
;; A TRACE is a COMPOSITION: a span is {:trace :span :parent :name :t0 :t1 :attrs
|
||
;; :events}, so a trace is a tree of spans (the same shape as an object's :body
|
||
;; composition). Later phases fold that tree into a waterfall (render-fold), OTLP
|
||
;; JSON (export-fold) and metrics (aggregate-fold). This phase gives us the model,
|
||
;; the dynamic parent stack that builds the tree, and a bounded in-memory ring
|
||
;; buffer — spans are cheap timed effects, NOT durable KV rows.
|
||
|
||
;; ── monotonic id + clock ─────────────────────────────────────────────
|
||
;; A simple process-monotonic counter gives collision-free ids without needing a
|
||
;; random source (Math.random/Date.now aren't available on this host).
|
||
(define otel/-id-counter 0)
|
||
(define otel/-next-id
|
||
(fn (prefix)
|
||
(begin
|
||
(set! otel/-id-counter (+ otel/-id-counter 1))
|
||
(str prefix otel/-id-counter))))
|
||
(define otel/gen-trace-id (fn () (otel/-next-id "trace-")))
|
||
(define otel/gen-span-id (fn () (otel/-next-id "span-")))
|
||
|
||
;; now-ns — real epoch time in NANOSECONDS (the unit OTLP wants for
|
||
;; start/endTimeUnixNano). The OCaml host exposes `clock-milliseconds`
|
||
;; (Unix.gettimeofday, epoch ms); we scale by 1e6. Wall clocks can step
|
||
;; backwards (NTP), so we clamp against a high-water mark: now-ns never
|
||
;; decreases, keeping span durations non-negative even across a clock step.
|
||
(define otel/-last-ns 0)
|
||
(define otel/now-ns
|
||
(fn ()
|
||
(let ((raw (* (clock-milliseconds) 1000000)))
|
||
(let ((t (if (> raw otel/-last-ns) raw otel/-last-ns)))
|
||
(begin
|
||
(set! otel/-last-ns t)
|
||
t)))))
|
||
|
||
;; ── the dynamic parent stack ─────────────────────────────────────────
|
||
;; head = the innermost (current) span context {:span id :trace id}. Pushing on
|
||
;; with-span entry / popping on exit is what turns lexical nesting into parent
|
||
;; links, so a trace tree falls out of ordinary call nesting.
|
||
(define otel/-stack (list))
|
||
(define otel/-push! (fn (ctx) (set! otel/-stack (cons ctx otel/-stack))))
|
||
(define otel/-pop! (fn () (set! otel/-stack (rest otel/-stack))))
|
||
(define otel/-top (fn () (if (= (len otel/-stack) 0) nil (first otel/-stack))))
|
||
|
||
(define otel/current-span
|
||
(fn () (let ((t (otel/-top))) (if t (get t :span) nil))))
|
||
(define otel/current-trace
|
||
(fn () (let ((t (otel/-top))) (if t (get t :trace) nil))))
|
||
|
||
;; ── the bounded ring buffer ──────────────────────────────────────────
|
||
;; Oldest → newest. record! appends and drops from the front once over cap, so the
|
||
;; buffer stays O(cap) no matter how many spans flow through. NOT persisted.
|
||
(define otel/-cap 1000)
|
||
(define otel/-ring (list))
|
||
(define otel/set-cap! (fn (n) (begin (set! otel/-cap n) nil)))
|
||
|
||
(define otel/record!
|
||
(fn (span)
|
||
(begin
|
||
(set! otel/-ring (append otel/-ring (list span)))
|
||
(if (> (len otel/-ring) otel/-cap)
|
||
(set! otel/-ring (slice otel/-ring (- (len otel/-ring) otel/-cap)))
|
||
nil)
|
||
span)))
|
||
|
||
(define otel/recent (fn () otel/-ring))
|
||
|
||
;; Clear the ring AND the parent stack — used between tests / requests.
|
||
(define otel/reset!
|
||
(fn ()
|
||
(begin
|
||
(set! otel/-ring (list))
|
||
(set! otel/-stack (list))
|
||
nil)))
|
||
|
||
;; ── -timed: the shared timed-effect core ─────────────────────────────
|
||
;; Records a span around (thunk): a fresh span id, the trace inherited from the
|
||
;; enclosing span (or a new trace at the root), the enclosing span as :parent, and
|
||
;; t0/t1 straddling the call. Pushes its context so nested spans see it as parent,
|
||
;; pops after, then records the finished span. `finalize` maps the thunk's result
|
||
;; to EXTRA attrs merged onto `attrs` (e.g. an http.status only known post-call).
|
||
;; Returns the thunk's value unchanged.
|
||
(define otel/-timed
|
||
(fn (name attrs finalize thunk)
|
||
(let ((parent-ctx (otel/-top)))
|
||
(let ((trace (if parent-ctx (get parent-ctx :trace) (otel/gen-trace-id)))
|
||
(span (otel/gen-span-id))
|
||
(parent (if parent-ctx (get parent-ctx :span) nil)))
|
||
(let ((t0 (begin (otel/-push! {:span span :trace trace}) (otel/now-ns))))
|
||
;; guard the body: on a raise, pop + record an ERROR span (status +
|
||
;; exception event) as a SIDE EFFECT in a clause test that returns false,
|
||
;; so the condition auto-propagates to the outer handler. (An explicit
|
||
;; `(raise e)` inside a guard handler re-enters this guard and hangs.)
|
||
(guard (e ((begin
|
||
(let ((t1 (otel/now-ns)))
|
||
(begin
|
||
(otel/-pop!)
|
||
(otel/record!
|
||
{:trace trace :span span :parent parent :name name
|
||
:t0 t0 :t1 t1 :status "error" :attrs attrs
|
||
:events (list {:name "exception" :time t1 :message (str e)})})))
|
||
false)
|
||
nil))
|
||
(let ((result (thunk)))
|
||
(let ((t1 (otel/now-ns)))
|
||
(begin
|
||
(otel/-pop!)
|
||
(otel/record!
|
||
{:trace trace :span span :parent parent :name name
|
||
:t0 t0 :t1 t1 :status "ok"
|
||
:attrs (merge attrs (finalize result))
|
||
:events (list)})
|
||
result)))))))))
|
||
|
||
;; with-span — a plain timed span; no result-derived attrs.
|
||
(define otel/with-span
|
||
(fn (name attrs thunk)
|
||
(otel/-timed name attrs (fn (r) {}) thunk)))
|
||
|
||
;; ── auto-instrument HTTP handlers ─────────────────────────────────────
|
||
;; A trace is a composition; a request handler is a timed effect. Wrap one Dream
|
||
;; route's handler so dispatching it records a root span "METHOD /route" with
|
||
;; {:http.method :http.route :http.status}. The status is only known after the
|
||
;; handler runs, so it's a finalize attr; the result is coerced the same way the
|
||
;; router coerces it, so a bare-string handler still reports 200. The raw handler
|
||
;; result is returned untouched so dr/run-route's own coercion is unchanged.
|
||
(define otel/instrument-route
|
||
(fn (r)
|
||
(let ((method (dream-route-method r))
|
||
(path (dream-route-path r))
|
||
(handler (dream-route-handler r)))
|
||
(dream-route method path
|
||
(fn (req)
|
||
;; :http.route is the PATTERN (groups metrics: all posts under /:slug);
|
||
;; :http.target is the ACTUAL path this request hit (e.g. /welcome), so
|
||
;; the trace view can show which concrete resource was served.
|
||
(otel/-timed
|
||
(str method " " path)
|
||
{:http.method method :http.route path :http.target (dream-path req)}
|
||
(fn (resp) {:http.status (dream-status (dream-coerce-response resp))})
|
||
(fn () (handler req))))))))
|
||
|
||
;; Flatten nested route groups and instrument each route — the host/make-app seam.
|
||
(define otel/instrument-routes
|
||
(fn (routes) (map otel/instrument-route (dr/flatten-routes routes))))
|
||
|
||
;; ── P4: render-fold → SVG waterfall ───────────────────────────────────
|
||
;; A trace is a tree of spans; a waterfall is a fold over that tree into SVG (the
|
||
;; same discipline as the compose render-fold: walk the structure, emit per node).
|
||
;; Layout: x ∝ (t0 − trace.t0), width ∝ duration, y ∝ depth (ancestor count via
|
||
;; parent links). Geometry is computed as plain rect data first (otel/waterfall-
|
||
;; rects, testable), then folded into <svg> markup (otel/waterfall).
|
||
(define otel/-row-h 18)
|
||
(define otel/-svg-w 640)
|
||
(define otel/-pad 4)
|
||
|
||
;; all spans belonging to one trace, from the ring.
|
||
(define otel/trace-spans
|
||
(fn (trace-id) (filter (fn (s) (= (get s :trace) trace-id)) (otel/recent))))
|
||
|
||
(define otel/-span-by-id
|
||
(fn (spans id) (first (filter (fn (s) (= (get s :span) id)) spans))))
|
||
|
||
;; depth = number of ancestors reachable via :parent within this trace's spans.
|
||
(define otel/-depth
|
||
(fn (s spans)
|
||
(if (nil? (get s :parent))
|
||
0
|
||
(let ((p (otel/-span-by-id spans (get s :parent))))
|
||
(if (nil? p) 0 (+ 1 (otel/-depth p spans)))))))
|
||
|
||
;; trace start = earliest t0; end = latest t1 (folds over the spans).
|
||
(define otel/-min-t0
|
||
(fn (spans) (reduce (fn (m s) (min m (get s :t0))) (get (first spans) :t0) spans)))
|
||
(define otel/-max-t1
|
||
(fn (spans) (reduce (fn (m s) (max m (get s :t1))) (get (first spans) :t1) spans)))
|
||
|
||
;; a rect per span: {:span :name :depth :x :y :w}. x/width are scaled to the
|
||
;; drawable width; a zero-duration span still gets a 1px sliver so it's visible.
|
||
(define otel/waterfall-rects
|
||
(fn (trace-id)
|
||
(let ((spans (otel/trace-spans trace-id)))
|
||
(if (empty? spans)
|
||
(list)
|
||
(let ((t0 (otel/-min-t0 spans))
|
||
(t1 (otel/-max-t1 spans))
|
||
(iw (- otel/-svg-w (* 2 otel/-pad))))
|
||
(let ((total (if (> (- t1 t0) 0) (- t1 t0) 1)))
|
||
(map
|
||
(fn (s)
|
||
(let ((d (otel/-depth s spans))
|
||
(dur (- (get s :t1) (get s :t0))))
|
||
(let ((raw-w (* (/ dur total) iw)))
|
||
{:span (get s :span)
|
||
:name (get s :name)
|
||
:depth d
|
||
:dur dur
|
||
:status (get s :status)
|
||
:x (+ otel/-pad (* (/ (- (get s :t0) t0) total) iw))
|
||
:y (+ otel/-pad (* d otel/-row-h))
|
||
:w (if (> raw-w 1) raw-w 1)})))
|
||
spans)))))))
|
||
|
||
;; ns → a compact "Nms" / "Nus" label.
|
||
(define otel/-ms
|
||
(fn (ns)
|
||
(if (>= ns 1000000)
|
||
(str (quotient ns 1000000) "ms")
|
||
(str (quotient ns 1000) "us"))))
|
||
|
||
;; error spans render red, everything else teal.
|
||
(define otel/-bar-fill
|
||
(fn (status) (if (= status "error") "#e45756" "#4c9a8f")))
|
||
|
||
;; one <g> (bar + name + duration) per rect. The bar is the only <rect>, so
|
||
;; count(rect) == count(span); axis chrome uses <line>/<text>. A leading <title>
|
||
;; gives a native hover tooltip with the full span name (the inline label clips on
|
||
;; narrow bars).
|
||
(define otel/-rect->g
|
||
(fn (r)
|
||
(quasiquote
|
||
(g
|
||
(title (unquote (str (get r :name) " · " (otel/-ms (get r :dur))
|
||
(if (= (get r :status) "error") " · error" ""))))
|
||
(rect :x (unquote (get r :x)) :y (unquote (get r :y))
|
||
:width (unquote (get r :w)) :height (unquote (- otel/-row-h 2))
|
||
:fill (unquote (otel/-bar-fill (get r :status))) :rx 2)
|
||
(text :x (unquote (+ (get r :x) 3)) :y (unquote (+ (get r :y) 12))
|
||
:font-size 10 :fill "#ffffff"
|
||
(unquote (str (get r :name) " " (otel/-ms (get r :dur)))))))))
|
||
|
||
;; max depth across rects (0 when empty) → svg height.
|
||
(define otel/-max-depth
|
||
(fn (rects) (reduce (fn (m r) (max m (get r :depth))) 0 rects)))
|
||
|
||
;; (0 1 … n-1)
|
||
(define otel/-range
|
||
(fn (n) (if (<= n 0) (list) (append (otel/-range (- n 1)) (list (- n 1))))))
|
||
|
||
;; a trace's wall span (ns): latest end − earliest start.
|
||
(define otel/-trace-dur
|
||
(fn (trace-id)
|
||
(let ((spans (otel/trace-spans trace-id)))
|
||
(if (empty? spans) 0 (- (otel/-max-t1 spans) (otel/-min-t0 spans))))))
|
||
|
||
;; a time ruler across the top: N vertical gridlines spanning the body, each with a
|
||
;; "<t>ms" label showing the offset from the trace start. Chrome is <line>/<text> so
|
||
;; the per-span <rect> count is unaffected.
|
||
(define otel/-ruler
|
||
(fn (dur ruler-h total-h)
|
||
(let ((inner (- otel/-svg-w (* 2 otel/-pad)))
|
||
(n 6))
|
||
(quasiquote
|
||
(g
|
||
(line :x1 (unquote otel/-pad) :y1 (unquote (- ruler-h 3))
|
||
:x2 (unquote (- otel/-svg-w otel/-pad)) :y2 (unquote (- ruler-h 3))
|
||
:stroke "#ccc" :stroke-width 1)
|
||
(splice-unquote
|
||
(map
|
||
(fn (i)
|
||
(let ((x (+ otel/-pad (quotient (* inner i) (- n 1))))
|
||
(ms (quotient (* dur i) (* (- n 1) 1000000))))
|
||
(quasiquote
|
||
(g
|
||
(line :x1 (unquote x) :y1 (unquote (- ruler-h 3)) :x2 (unquote x)
|
||
:y2 (unquote total-h) :stroke "#eee" :stroke-width 1)
|
||
(text :x (unquote (+ x 2)) :y 10 :font-size 9 :fill "#999"
|
||
(unquote (str ms "ms")))))))
|
||
(otel/-range n))))))))
|
||
|
||
;; the trace as an inline <svg> waterfall, with a time ruler above the bars.
|
||
(define otel/waterfall
|
||
(fn (trace-id)
|
||
(let ((rects (otel/waterfall-rects trace-id))
|
||
(ruler-h 16))
|
||
(let ((body-h (* (+ (otel/-max-depth rects) 1) otel/-row-h)))
|
||
(let ((h (+ (* 2 otel/-pad) ruler-h body-h)))
|
||
(quasiquote
|
||
(svg :width (unquote otel/-svg-w) :height (unquote h)
|
||
:xmlns "http://www.w3.org/2000/svg" :font-family "monospace"
|
||
(unquote (otel/-ruler (otel/-trace-dur trace-id) ruler-h h))
|
||
(g :transform (unquote (str "translate(0," (+ otel/-pad ruler-h) ")"))
|
||
(splice-unquote (map otel/-rect->g rects))))))))))
|
||
|
||
;; count nodes whose head symbol prints as `head` — a small SVG-tree assertion aid.
|
||
(define otel/-tree-count
|
||
(fn (tree head)
|
||
(if (list? tree)
|
||
(let ((self (if (and (not (empty? tree)) (= (str (first tree)) head)) 1 0)))
|
||
(reduce (fn (acc n) (+ acc (otel/-tree-count n head))) self tree))
|
||
0)))
|
||
|
||
;; ── P5: metrics (aggregate-fold) ──────────────────────────────────────
|
||
;; Fold the recent spans into per-route request counts + a latency histogram
|
||
;; (p50/p95/p99 from durations). No `sort` primitive here, so percentiles ride a
|
||
;; tiny insertion sort; nearest-rank keeps the maths exact for the tests.
|
||
(define otel/-insert
|
||
(fn (x sorted)
|
||
(if (empty? sorted)
|
||
(list x)
|
||
(if (<= x (first sorted))
|
||
(cons x sorted)
|
||
(cons (first sorted) (otel/-insert x (rest sorted)))))))
|
||
(define otel/-sort-nums
|
||
(fn (lst) (reduce (fn (acc x) (otel/-insert x acc)) (list) lst)))
|
||
|
||
;; nearest-rank percentile of an ASCENDING list: rank = ceil(p/100 · N), 1-based.
|
||
(define otel/-percentile
|
||
(fn (sorted p)
|
||
(if (empty? sorted)
|
||
0
|
||
(let ((n (len sorted)))
|
||
(let ((idx (- (ceil (* (/ p 100) n)) 1)))
|
||
(let ((i (if (< idx 0) 0 (if (>= idx n) (- n 1) idx))))
|
||
(nth sorted i)))))))
|
||
|
||
;; a span's route label: the http.route attr, else the span name.
|
||
(define otel/-span-route
|
||
(fn (s) (or (get (get s :attrs) :http.route) (get s :name))))
|
||
|
||
(define otel/-span-dur (fn (s) (- (get s :t1) (get s :t0))))
|
||
|
||
;; distinct values, order-preserving.
|
||
(define otel/-distinct
|
||
(fn (lst)
|
||
(reduce
|
||
(fn (acc x) (if (some (fn (y) (= y x)) acc) acc (append acc (list x))))
|
||
(list)
|
||
lst)))
|
||
|
||
;; the aggregate for one route: count + latency percentiles over its durations.
|
||
(define otel/route-metrics
|
||
(fn (spans route)
|
||
(let ((rs (filter (fn (s) (= (otel/-span-route s) route)) spans)))
|
||
(let ((durs (otel/-sort-nums (map otel/-span-dur rs))))
|
||
{:route route
|
||
:count (len rs)
|
||
:p50 (otel/-percentile durs 50)
|
||
:p95 (otel/-percentile durs 95)
|
||
:p99 (otel/-percentile durs 99)}))))
|
||
|
||
;; fold spans → {:total-requests N :routes (per-route metrics …)}.
|
||
(define otel/metrics
|
||
(fn (spans)
|
||
(let ((routes (otel/-distinct (map otel/-span-route spans))))
|
||
{:total-requests (len spans)
|
||
:routes (map (fn (r) (otel/route-metrics spans r)) routes)})))
|
||
|
||
;; convenience: metrics over the current ring.
|
||
(define otel/metrics-recent (fn () (otel/metrics (otel/recent))))
|
||
|
||
;; ── P6: live dashboard (SSR + SSE) ────────────────────────────────────
|
||
;; GET /otel renders a dashboard — metrics strip + the latest trace's waterfall +
|
||
;; a recent-traces list — as server-rendered HTML carrying Datastar-style reactive
|
||
;; attributes that subscribe to GET /otel/stream, the SSE feed of new span events
|
||
;; (SXTP events, the host's Datastar-borrowed wire format). SSR + declarative
|
||
;; reactive attrs + SSE patches IS the reactive-island model here. (Live client
|
||
;; hydration is a deploy concern; SSR, the event feed, and the data are tested.)
|
||
|
||
;; recent traces, newest-first: {:trace :name :spans}. root name = the parentless
|
||
;; span (fallback: first recorded).
|
||
(define otel/-trace-ids
|
||
(fn () (otel/-distinct (map (fn (s) (get s :trace)) (otel/recent)))))
|
||
(define otel/-trace-root-name
|
||
(fn (trace-id)
|
||
(let ((spans (otel/trace-spans trace-id)))
|
||
(let ((roots (filter (fn (s) (nil? (get s :parent))) spans)))
|
||
(cond
|
||
((not (empty? roots)) (get (first roots) :name))
|
||
((not (empty? spans)) (get (first spans) :name))
|
||
(else ""))))))
|
||
;; the actual target path of a trace's root request (nil if not an http trace).
|
||
(define otel/-trace-target
|
||
(fn (trace-id)
|
||
(let ((roots (filter (fn (s) (nil? (get s :parent))) (otel/trace-spans trace-id))))
|
||
(if (empty? roots) nil (get (get (first roots) :attrs) :http.target)))))
|
||
;; when the trace last did something (latest span end), epoch ns.
|
||
(define otel/-trace-end
|
||
(fn (trace-id)
|
||
(let ((spans (otel/trace-spans trace-id)))
|
||
(if (empty? spans) 0 (otel/-max-t1 spans)))))
|
||
;; a ns age → a coarse "N<unit> ago" label.
|
||
(define otel/-ago
|
||
(fn (age-ns)
|
||
(let ((s (quotient (if (< age-ns 0) 0 age-ns) 1000000000)))
|
||
(cond
|
||
((< s 1) "just now")
|
||
((< s 60) (str s "s ago"))
|
||
((< s 3600) (str (quotient s 60) "m ago"))
|
||
(else (str (quotient s 3600) "h ago"))))))
|
||
(define otel/trace-summary
|
||
(fn (trace-id)
|
||
{:trace trace-id
|
||
:name (otel/-trace-root-name trace-id)
|
||
:target (otel/-trace-target trace-id)
|
||
:dur (otel/-trace-dur trace-id)
|
||
:t (otel/-trace-end trace-id)
|
||
:spans (len (otel/trace-spans trace-id))}))
|
||
(define otel/recent-traces
|
||
(fn () (reverse (map otel/trace-summary (otel/-trace-ids)))))
|
||
;; the dashboard's own endpoints — excluded from the "latest trace" so the
|
||
;; waterfall shows a real request, not the 3s poll.
|
||
(define otel/-self-route?
|
||
(fn (route)
|
||
(or (= route "/otel") (= route "/otel/fragment") (= route "/otel/stream"))))
|
||
(define otel/-trace-route
|
||
(fn (trace-id)
|
||
(let ((roots (filter (fn (s) (nil? (get s :parent))) (otel/trace-spans trace-id))))
|
||
(if (empty? roots) nil (get (get (first roots) :attrs) :http.route)))))
|
||
;; which trace the waterfall shows: prefer the newest MULTI-span trace (an
|
||
;; instrumented page render — the interesting one), else the newest non-self trace
|
||
;; (skip the poll), else the newest trace. Single-span assets/polls lose to a real
|
||
;; page render even when they're more recent.
|
||
(define otel/latest-trace
|
||
(fn ()
|
||
(let ((newest-first (reverse (otel/-trace-ids))))
|
||
(let ((multi (filter (fn (tid) (> (len (otel/trace-spans tid)) 1)) newest-first))
|
||
(real (filter (fn (tid) (not (otel/-self-route? (otel/-trace-route tid)))) newest-first)))
|
||
(cond
|
||
((not (empty? multi)) (first multi))
|
||
((not (empty? real)) (first real))
|
||
((not (empty? newest-first)) (first newest-first))
|
||
(else nil))))))
|
||
|
||
;; ── SSE span events (SXTP) ────────────────────────────────────────────
|
||
(define otel/span-event
|
||
(fn (s)
|
||
(sxtp/event "otel.span"
|
||
{:id (get s :span)
|
||
:body {:trace (get s :trace) :span (get s :span) :parent (get s :parent)
|
||
:name (get s :name) :t0 (get s :t0) :t1 (get s :t1)
|
||
:attrs (get s :attrs)}
|
||
:time (get s :t1)})))
|
||
(define otel/latest-span-event
|
||
(fn ()
|
||
(let ((r (otel/recent)))
|
||
(if (empty? r) nil (otel/span-event (last r))))))
|
||
;; one SSE frame: `event: otel.span\n data: <sxtp event>\n\n`; "" when no spans.
|
||
(define otel/-stream-body
|
||
(fn ()
|
||
(let ((e (otel/latest-span-event)))
|
||
(if (nil? e)
|
||
""
|
||
(str "event: otel.span\ndata: " (sxtp/serialize e) "\n\n")))))
|
||
|
||
;; ── dashboard markup (plain HTML tags → render-to-html SSRs cleanly) ──
|
||
(define otel/-metrics-strip
|
||
(fn (m)
|
||
(quasiquote
|
||
(table :class "otel-metrics"
|
||
(tr (th "route") (th "count") (th "p50") (th "p95") (th "p99"))
|
||
(splice-unquote
|
||
(map
|
||
(fn (r)
|
||
(quasiquote
|
||
(tr (td (unquote (str (get r :route))))
|
||
(td (unquote (str (get r :count))))
|
||
(td (unquote (str (get r :p50))))
|
||
(td (unquote (str (get r :p95))))
|
||
(td (unquote (str (get r :p99)))))))
|
||
(get m :routes)))))))
|
||
|
||
(define otel/-traces-list
|
||
(fn (traces)
|
||
;; capture "now" ONCE so all ages are relative to the same instant; each 3s
|
||
;; re-render recomputes it, so the labels tick upward (2s ago → 5s ago → …).
|
||
(let ((now (otel/now-ns)))
|
||
(quasiquote
|
||
(ul :class "otel-traces"
|
||
(splice-unquote
|
||
(map
|
||
(fn (t)
|
||
(quasiquote
|
||
(li :data-trace (unquote (str (get t :trace)))
|
||
;; concrete target (/welcome) if known, else the span name; then
|
||
;; wall duration, span count, and how long ago it happened.
|
||
(span :style "font-weight:600"
|
||
(unquote (or (get t :target) (get t :name))))
|
||
(unquote (str " · " (otel/-ms (get t :dur))
|
||
" · " (get t :spans) " spans"
|
||
" · " (otel/-ago (- now (get t :t))))))))
|
||
traces)))))))
|
||
|
||
;; a per-route latency bar chart: each row is a nested bar — teal p50 inside amber
|
||
;; p95 inside light-red p99 — so the tail (red beyond amber) is visible at a glance.
|
||
(define otel/-latency-chart
|
||
(fn (m)
|
||
(let ((routes (get m :routes)))
|
||
(if (empty? routes)
|
||
(quasiquote (p :class "otel-empty" "no metrics yet"))
|
||
(let ((maxp99 (reduce (fn (mx r) (max mx (get r :p99))) 1 routes))
|
||
(label-w 150)
|
||
(bar-w 340)
|
||
(row-h 24))
|
||
(quasiquote
|
||
(svg :width 560 :height (unquote (+ (* (len routes) row-h) 6))
|
||
:xmlns "http://www.w3.org/2000/svg" :font-family "monospace"
|
||
(splice-unquote
|
||
(map-indexed
|
||
(fn (i r)
|
||
(let ((y (* i row-h)))
|
||
(let ((p99w (max 1 (* (/ (get r :p99) maxp99) bar-w)))
|
||
(p95w (max 1 (* (/ (get r :p95) maxp99) bar-w)))
|
||
(p50w (max 1 (* (/ (get r :p50) maxp99) bar-w))))
|
||
(quasiquote
|
||
(g
|
||
(text :x 0 :y (unquote (+ y 15)) :font-size 11 :fill "#333"
|
||
(unquote (get r :route)))
|
||
(rect :x (unquote label-w) :y (unquote (+ y 5)) :width (unquote p99w)
|
||
:height 14 :fill "#f2c2c2" :rx 3)
|
||
(rect :x (unquote label-w) :y (unquote (+ y 5)) :width (unquote p95w)
|
||
:height 14 :fill "#f2a63b" :rx 3)
|
||
(rect :x (unquote label-w) :y (unquote (+ y 5)) :width (unquote p50w)
|
||
:height 14 :fill "#4c9a8f" :rx 3)
|
||
(text :x (unquote (+ label-w p99w 5)) :y (unquote (+ y 15))
|
||
:font-size 9 :fill "#999"
|
||
(unquote (str "p99 " (otel/-ms (get r :p99))))))))))
|
||
routes)))))))))
|
||
|
||
;; the refreshing content — everything that changes as spans arrive. Served on its
|
||
;; own at GET /otel/fragment; the poll swaps THIS into #otel-body.
|
||
(define otel/-dashboard-body
|
||
(fn ()
|
||
(let ((m (otel/metrics (otel/recent)))
|
||
(lt (otel/latest-trace))
|
||
(traces (otel/recent-traces)))
|
||
(quasiquote
|
||
(div
|
||
(h2 "latency by route")
|
||
(p :style "font-size:0.75em;opacity:0.7"
|
||
(span :style "color:#4c9a8f" "▉ p50") " "
|
||
(span :style "color:#f2a63b" "▉ p95") " "
|
||
(span :style "color:#f2c2c2" "▉ p99 (tail)"))
|
||
(unquote (otel/-latency-chart m))
|
||
(h2 "metrics")
|
||
(unquote (otel/-metrics-strip m))
|
||
(h2 "latest trace")
|
||
(unquote
|
||
(if (nil? lt)
|
||
(quasiquote (p :class "otel-empty" "no traces yet"))
|
||
(otel/waterfall lt)))
|
||
(h2 "recent traces")
|
||
(unquote (otel/-traces-list traces)))))))
|
||
|
||
(define otel/dashboard
|
||
(fn ()
|
||
(quasiquote
|
||
(div :id "otel-dashboard"
|
||
(h1 "OpenTelemetry")
|
||
(p :style "font-size:0.8em;opacity:0.7" "live · refreshes every 3s in-app")
|
||
;; HIDDEN POLLER: a sibling that is never itself a swap target, so the swap's
|
||
;; dispose pass never touches it and its poll interval keeps firing. It GETs
|
||
;; /otel/fragment every 3s and swaps the SIBLING #otel-body's inner content.
|
||
;; (Polling the body element itself died after ~2 ticks — the innerHTML swap's
|
||
;; dispose-islands-in disposed the poll's own callback.)
|
||
(span :sx-get "/otel/fragment" :sx-trigger "every 3s" :sx-target "#otel-body"
|
||
:sx-swap "innerHTML" :style "display:none")
|
||
(div :id "otel-body" (unquote (otel/-dashboard-body)))))))
|
||
|
||
;; ── routes ────────────────────────────────────────────────────────────
|
||
;; Dual-mode, wired into the SPA: a boosted (SX-Request) fetch — a link click OR
|
||
;; the 3s poll — gets the dashboard as a text/sx fragment the WASM kernel renders
|
||
;; into place; a direct/no-JS load gets the full SPA shell (host/blog--page) with
|
||
;; the dashboard in #content, so it degrades to a plain server-rendered page.
|
||
;; (Reuses the host SPA shell + content-type negotiation from lib/host/blog.sx.)
|
||
(define otel/dashboard-route
|
||
(dream-get "/otel"
|
||
(fn (req)
|
||
(host/blog--resp req 200
|
||
(host/blog--page req "OpenTelemetry" (otel/dashboard))))))
|
||
;; the poll target: always the refreshing body as a text/sx fragment (the WASM
|
||
;; kernel swaps it into #otel-body). Two segments, so /:slug can't shadow it.
|
||
(define otel/dashboard-fragment-route
|
||
(dream-get "/otel/fragment"
|
||
(fn (req)
|
||
(dream-response 200 {:content-type "text/sx; charset=utf-8"}
|
||
(serialize (otel/-dashboard-body))))))
|
||
(define otel/stream-route
|
||
(dream-get "/otel/stream"
|
||
(fn (req)
|
||
(dream-response 200 {:content-type "text/event-stream"} (otel/-stream-body)))))
|
||
(define otel/routes (list otel/dashboard-route otel/dashboard-fragment-route otel/stream-route))
|
||
|
||
;; ── P7: OTLP-JSON export ──────────────────────────────────────────────
|
||
;; Serialize spans to the OTLP/JSON schema (resourceSpans → scopeSpans → spans)
|
||
;; for interop with Jaeger/Grafana. traceId/spanId/parentSpanId are hex strings;
|
||
;; timestamps are uint64-as-string; attributes are typed {key,value}. Export is a
|
||
;; fold (span → OTLP span); POST goes through an INJECTED transport so it's
|
||
;; testable without a live collector.
|
||
|
||
;; our "trace-N"/"span-N" ids → a fixed-width lowercase-hex id (real OTLP widths:
|
||
;; traceId 16 bytes = 32 hex, spanId 8 bytes = 16 hex). Deterministic from the id.
|
||
(define otel/-id-num
|
||
(fn (id)
|
||
(let ((n (string->number (last (split id "-")))))
|
||
(if (nil? n) 0 n))))
|
||
(define otel/-zeros
|
||
(fn (k) (if (<= k 0) "" (str "0" (otel/-zeros (- k 1))))))
|
||
(define otel/-pad-hex
|
||
(fn (n width)
|
||
(let ((h (number->string n 16)))
|
||
(str (otel/-zeros (- width (len h))) h))))
|
||
(define otel/-trace-hex (fn (id) (otel/-pad-hex (otel/-id-num id) 32)))
|
||
(define otel/-span-hex (fn (id) (otel/-pad-hex (otel/-id-num id) 16)))
|
||
|
||
;; a value → OTLP AnyValue: numbers are intValue (uint64-as-string), else stringValue.
|
||
(define otel/-otlp-value
|
||
(fn (v)
|
||
(if (= (type-of v) "number")
|
||
{:intValue (str v)}
|
||
{:stringValue (str v)})))
|
||
(define otel/-otlp-attrs
|
||
(fn (attrs)
|
||
(map (fn (k) {:key (str k) :value (otel/-otlp-value (get attrs k))}) (keys attrs))))
|
||
|
||
;; span kind: 2 = SERVER for an instrumented request (has http.method), else
|
||
;; 1 = INTERNAL.
|
||
(define otel/-otlp-kind
|
||
(fn (attrs) (if (get attrs :http.method) 2 1)))
|
||
|
||
(define otel/-otlp-span
|
||
(fn (s)
|
||
(let ((attrs (or (get s :attrs) {})))
|
||
(let ((base
|
||
{:traceId (otel/-trace-hex (get s :trace))
|
||
:spanId (otel/-span-hex (get s :span))
|
||
:name (get s :name)
|
||
:kind (otel/-otlp-kind attrs)
|
||
:startTimeUnixNano (str (get s :t0))
|
||
:endTimeUnixNano (str (get s :t1))
|
||
:attributes (otel/-otlp-attrs attrs)}))
|
||
(if (nil? (get s :parent))
|
||
base
|
||
(assoc base :parentSpanId (otel/-span-hex (get s :parent))))))))
|
||
|
||
;; spans → the OTLP export envelope (a JSON-shaped SX value).
|
||
(define otel/export-otlp
|
||
(fn (spans)
|
||
{:resourceSpans
|
||
(list
|
||
{:resource
|
||
{:attributes (list {:key "service.name"
|
||
:value {:stringValue "rose-ash-host"}})}
|
||
:scopeSpans
|
||
(list
|
||
{:scope {:name "otel-on-sx" :version "0.1.0"}
|
||
:spans (map otel/-otlp-span spans)})})}))
|
||
|
||
(define otel/export-otlp-json
|
||
(fn (spans) (dream-json-encode (otel/export-otlp spans))))
|
||
|
||
;; POST the OTLP payload through an injected transport. `transport` is a fn taking
|
||
;; a request dict {:method :url :headers :body} — real deploys pass an http POST;
|
||
;; tests pass a recorder. Returns whatever the transport returns.
|
||
(define otel/post-otlp
|
||
(fn (endpoint spans transport)
|
||
(transport
|
||
{:method "POST"
|
||
:url endpoint
|
||
:headers {:content-type "application/json"}
|
||
:body (otel/export-otlp-json spans)})))
|
||
|
||
;; ── P8: W3C traceparent context propagation ───────────────────────────
|
||
;; traceparent = "<version>-<trace-id>-<parent-id>-<flags>" (RFC: version 00,
|
||
;; trace-id 32 hex, parent-id 16 hex, flags 2 hex; low bit = sampled). Emitting it
|
||
;; on outbound inter-service calls and parsing it on inbound requests lets ONE
|
||
;; trace span multiple services. Our internal ids hex-encode via the OTLP helpers.
|
||
(define otel/format-traceparent
|
||
(fn (trace-id span-id)
|
||
(str "00-" (otel/-trace-hex trace-id) "-" (otel/-span-hex span-id) "-01")))
|
||
|
||
;; the traceparent for the CURRENT span (nil outside any span) — for outbound calls.
|
||
(define otel/current-traceparent
|
||
(fn ()
|
||
(let ((t (otel/-top)))
|
||
(if (nil? t) nil (otel/format-traceparent (get t :trace) (get t :span))))))
|
||
|
||
;; parse an inbound traceparent → {:version :trace-id :parent-id :flags :sampled};
|
||
;; nil if absent or malformed (wrong field count / hex widths).
|
||
(define otel/parse-traceparent
|
||
(fn (header)
|
||
(if (or (nil? header) (= header ""))
|
||
nil
|
||
(let ((parts (split header "-")))
|
||
(if (= (len parts) 4)
|
||
(let ((ver (nth parts 0)) (tid (nth parts 1))
|
||
(pid (nth parts 2)) (flags (nth parts 3)))
|
||
(if (and (= (len tid) 32) (= (len pid) 16))
|
||
{:version ver :trace-id tid :parent-id pid :flags flags
|
||
:sampled (not (= flags "00"))}
|
||
nil))
|
||
nil)))))
|