Step 17b: Pretext — DOM-free text layout with otfm font measurement

Pure SX text layout library with one IO boundary (text-measure perform).
Knuth-Plass optimal line breaking, Liang's hyphenation, position calculation.

Library (lib/text-layout.sx):
- break-lines: Knuth-Plass DP over word widths
- break-lines-greedy: simple word-wrap for comparison
- hyphenate-word: Liang's trie algorithm
- position-line/position-lines: running x/y sums
- measure-text: single perform (text-measure IO)

Server font measurement (otfm):
- Reads OpenType cmap + hmtx tables from .ttf files
- DejaVu Serif/Sans bundled in shared/static/fonts/
- _cek_io_resolver hook: perform works inside aser/eval_expr
- JIT VM suspension inline resolution for IO in compiled code

~font component (shared/sx/templates/font.sx):
- Works like ~tw: emits @font-face CSS via cssx scope
- Sets font-family on parent via spread
- Deduplicates font declarations

Infrastructure fixes:
- stdin load command: per-expression error handling (was aborting on first error)
- cek_run IO hook: _cek_io_resolver in sx_types.ml
- JIT VmSuspended: inline IO resolution when resolver installed
- ListRef handling in IO resolver (perform creates ListRef, not List)

Demo page at /sx/(applications.(pretext)):
- Hero: justified paragraph with otfm-measured proportional widths
- Greedy vs Knuth-Plass side-by-side comparison
- Badness scoring visualization
- Hyphenation syllable decomposition

25 new tests (spec/tests/test-text-layout.sx), 3201/3201 passing.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-12 15:13:00 +00:00
parent f60d22e86e
commit 1eadefd0c1
9 changed files with 274 additions and 52 deletions

View File

@@ -1,6 +1,6 @@
(executables
(names run_tests debug_set sx_server integration_tests)
(libraries sx unix threads.posix))
(libraries sx unix threads.posix otfm))
(executable
(name mcp_tree)

View File

@@ -1422,6 +1422,7 @@ let run_spec_tests env test_files =
let templates_dir = Filename.concat project_dir "shared/sx/templates" in
load_module "tw.sx" templates_dir;
load_module "tw-layout.sx" templates_dir;
load_module "font.sx" templates_dir;
load_module "tw-type.sx" templates_dir;
(* SX docs site: components, handlers, demos *)
let sx_comp_dir = Filename.concat project_dir "sx/sxc" in

View File

@@ -18,6 +18,123 @@
open Sx_types
(* ====================================================================== *)
(* Font measurement via otfm — reads OpenType/TrueType font tables *)
(* ====================================================================== *)
(* Font cache: font-family → (units_per_em, glyph_id_map, advance_width_map) *)
let _font_cache : (string, (int * (int, int) Hashtbl.t * (int, int) Hashtbl.t)) Hashtbl.t = Hashtbl.create 4
(* Map font-family names to file paths *)
let _font_base = ref "shared/static/fonts"
let font_path_for family =
let base = !_font_base in
let try_paths paths =
List.find_opt Sys.file_exists paths in
match String.lowercase_ascii family with
| "serif" | "times" | "times new roman" ->
try_paths [base ^ "/DejaVuSerif.ttf";
"/usr/share/fonts/truetype/dejavu/DejaVuSerif.ttf"]
| "sans-serif" | "sans" | "arial" | "helvetica" ->
try_paths [base ^ "/DejaVuSans.ttf";
"/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf"]
| "monospace" | "mono" | "courier" ->
try_paths [base ^ "/DejaVuSansMono-Bold.ttf";
"/usr/share/fonts/truetype/dejavu/DejaVuSansMono-Bold.ttf"]
| path when Sys.file_exists path -> Some path
| _ -> try_paths [base ^ "/DejaVuSerif.ttf";
"/usr/share/fonts/truetype/dejavu/DejaVuSerif.ttf"]
(* Load a font file and extract cmap + hmtx tables *)
let load_font family =
match Hashtbl.find_opt _font_cache family with
| Some cached -> Some cached
| None ->
match font_path_for family with
| None -> None
| Some path ->
try
let ic = open_in_bin path in
let n = in_channel_length ic in
let buf = Bytes.create n in
really_input ic buf 0 n;
close_in ic;
let s = Bytes.to_string buf in
let d = Otfm.decoder (`String s) in
(* Get units_per_em from head table *)
let upm = match Otfm.head d with
| Error _ -> 2048
| Ok h -> h.Otfm.head_units_per_em in
(* Build char→glyph_id map from cmap.
cmap folds: (acc -> map_kind -> (u0,u1) -> glyph_id -> acc) *)
let cmap_tbl = Hashtbl.create 256 in
let _ = Otfm.cmap d (fun acc kind (u0, u1) gid ->
(match kind with
| `Glyph ->
(* Single mapping: u0 -> gid (u0 = u1) *)
Hashtbl.replace cmap_tbl u0 gid
| `Glyph_range ->
(* Range: u0..u1 -> gid..(gid + u1 - u0) *)
for i = 0 to u1 - u0 do
Hashtbl.replace cmap_tbl (u0 + i) (gid + i)
done);
acc) () in
(* Build glyph_id→advance_width map from hmtx.
hmtx folds: (acc -> glyph_id -> adv_width -> lsb -> acc) *)
let hmtx_tbl = Hashtbl.create 256 in
let _ = Otfm.hmtx d (fun acc gid adv _lsb ->
Hashtbl.replace hmtx_tbl gid adv; acc) () in
let result = (upm, cmap_tbl, hmtx_tbl) in
Printf.eprintf "[font] loaded %s: %d glyphs, %d metrics, upm=%d\n%!"
(Filename.basename path) (Hashtbl.length cmap_tbl) (Hashtbl.length hmtx_tbl) upm;
Hashtbl.replace _font_cache family result;
Some result
with e ->
Printf.eprintf "[font] error loading %s: %s\n%!" path (Printexc.to_string e);
None
(* Measure text width using font tables *)
let measure_text_otfm family size text =
match load_font family with
| None ->
(* Fallback to monospace approximation *)
let w = size *. 0.6 *. (float_of_int (String.length text)) in
(w, size, size *. 0.8, size *. 0.2)
| Some (upm, cmap_tbl, hmtx_tbl) ->
let scale = size /. (float_of_int upm) in
let width = ref 0.0 in
(* Iterate over UTF-8 codepoints *)
let i = ref 0 in
let len = String.length text in
while !i < len do
let byte = Char.code text.[!i] in
let cp, advance =
if byte < 0x80 then (byte, 1)
else if byte < 0xC0 then (byte, 1) (* continuation — skip *)
else if byte < 0xE0 then
((byte land 0x1F) lsl 6 lor (Char.code text.[min (!i+1) (len-1)] land 0x3F), 2)
else if byte < 0xF0 then
((byte land 0x0F) lsl 12
lor (Char.code text.[min (!i+1) (len-1)] land 0x3F) lsl 6
lor (Char.code text.[min (!i+2) (len-1)] land 0x3F), 3)
else
((byte land 0x07) lsl 18
lor (Char.code text.[min (!i+1) (len-1)] land 0x3F) lsl 12
lor (Char.code text.[min (!i+2) (len-1)] land 0x3F) lsl 6
lor (Char.code text.[min (!i+3) (len-1)] land 0x3F), 4)
in
let gid = match Hashtbl.find_opt cmap_tbl cp with
| Some g -> g | None -> 0 in
let adv = match Hashtbl.find_opt hmtx_tbl gid with
| Some a -> a | None -> upm / 2 in
width := !width +. (float_of_int adv) *. scale;
i := !i + advance
done;
let ascent = size *. 0.8 in (* approximate — could read OS/2 table *)
let descent = size *. 0.2 in
(!width, size, ascent, descent)
(* ====================================================================== *)
(* Output helpers *)
@@ -361,9 +478,9 @@ and cek_run_with_io state =
Nil
end
| "text-measure" ->
(* Resolve locally — monospace approximation *)
let args = let a = Sx_runtime.get_val request (String "args") in
(match a with List l -> l | _ -> [a]) in
let font = match args with String f :: _ -> f | _ -> "serif" in
let size = match args with
| [_font; Number sz; _text] -> sz
| [_font; Number sz] -> sz
@@ -371,13 +488,12 @@ and cek_run_with_io state =
let text = match args with
| [_font; _sz; String t] -> t
| _ -> "" in
let char_width = size *. 0.6 in
let width = char_width *. (float_of_int (String.length text)) in
let (w, h, asc, desc) = measure_text_otfm font size text in
let d = Hashtbl.create 4 in
Hashtbl.replace d "width" (Number width);
Hashtbl.replace d "height" (Number size);
Hashtbl.replace d "ascent" (Number (size *. 0.8));
Hashtbl.replace d "descent" (Number (size *. 0.2));
Hashtbl.replace d "width" (Number w);
Hashtbl.replace d "height" (Number h);
Hashtbl.replace d "ascent" (Number asc);
Hashtbl.replace d "descent" (Number desc);
Dict d
| _ ->
let args = let a = Sx_runtime.get_val request (String "args") in
@@ -967,7 +1083,16 @@ let register_jit_hook env =
(try Some (Sx_vm.call_closure cl args cl.vm_env_ref)
with
| Sx_vm.VmSuspended (request, saved_vm) ->
Some (make_vm_suspend_marker request saved_vm)
(* Try inline IO resolution; fall back to suspend marker *)
(match !Sx_types._cek_io_resolver with
| Some resolver ->
let rec resolve_loop req vm =
let result = resolver req (Nil) in
(try Some (Sx_vm.resume_vm vm result)
with Sx_vm.VmSuspended (req2, vm2) -> resolve_loop req2 vm2)
in
resolve_loop request saved_vm
| None -> Some (make_vm_suspend_marker request saved_vm))
| e ->
let fn_name = match l.l_name with Some n -> n | None -> "?" in
if not (Hashtbl.mem _jit_warned fn_name) then begin
@@ -992,7 +1117,15 @@ let register_jit_hook env =
(try Some (Sx_vm.call_closure cl args cl.vm_env_ref)
with
| Sx_vm.VmSuspended (request, saved_vm) ->
Some (make_vm_suspend_marker request saved_vm)
(match !Sx_types._cek_io_resolver with
| Some resolver ->
let rec resolve_loop req vm =
let result = resolver req (Nil) in
(try Some (Sx_vm.resume_vm vm result)
with Sx_vm.VmSuspended (req2, vm2) -> resolve_loop req2 vm2)
in
resolve_loop request saved_vm
| None -> Some (make_vm_suspend_marker request saved_vm))
| e ->
Printf.eprintf "[jit] %s first-call fallback to CEK: %s\n%!" fn_name (Printexc.to_string e);
Hashtbl.replace _jit_warned fn_name true;
@@ -1852,6 +1985,8 @@ let eval_with_io_render expr env =
| _ -> Nil in
let result = match op with
| "text-measure" ->
let font = match args with
| List (String f :: _) -> f | _ -> "serif" in
let size = match args with
| List [_font; Number sz; _text] -> sz
| List [_font; Number sz] -> sz
@@ -1859,13 +1994,12 @@ let eval_with_io_render expr env =
let text = match args with
| List [_font; _sz; String t] -> t
| _ -> "" in
let char_width = size *. 0.6 in
let width = char_width *. (float_of_int (String.length text)) in
let (w, h, asc, desc) = measure_text_otfm font size text in
let d = Hashtbl.create 4 in
Hashtbl.replace d "width" (Number width);
Hashtbl.replace d "height" (Number size);
Hashtbl.replace d "ascent" (Number (size *. 0.8));
Hashtbl.replace d "descent" (Number (size *. 0.2));
Hashtbl.replace d "width" (Number w);
Hashtbl.replace d "height" (Number h);
Hashtbl.replace d "ascent" (Number asc);
Hashtbl.replace d "descent" (Number desc);
Dict d
| "io-sleep" | "sleep" ->
let ms = match args with
@@ -2057,8 +2191,8 @@ let eval_with_io expr env =
Nil
with _ -> Nil)
| "text-measure" ->
(* Pretext: server-side text measurement (monospace approximation).
Real otfm font-table parsing can replace this later. *)
let font = match args with
| List (String f :: _) -> f | _ -> "serif" in
let size = match args with
| List [_font; Number sz; _text] -> sz
| List [_font; Number sz] -> sz
@@ -2066,15 +2200,12 @@ let eval_with_io expr env =
let text = match args with
| List [_font; _sz; String t] -> t
| _ -> "" in
let char_width = size *. 0.6 in
let width = char_width *. (float_of_int (String.length text)) in
let ascent = size *. 0.8 in
let descent = size *. 0.2 in
let (w, h, asc, desc) = measure_text_otfm font size text in
let d = Hashtbl.create 4 in
Hashtbl.replace d "width" (Number width);
Hashtbl.replace d "height" (Number size);
Hashtbl.replace d "ascent" (Number ascent);
Hashtbl.replace d "descent" (Number descent);
Hashtbl.replace d "width" (Number w);
Hashtbl.replace d "height" (Number h);
Hashtbl.replace d "ascent" (Number asc);
Hashtbl.replace d "descent" (Number desc);
Dict d
| _ ->
Printf.eprintf "[io] unhandled IO op: %s\n%!" op;
@@ -3148,6 +3279,7 @@ let http_mode port =
let dev_path = project_dir ^ "/shared/static" in
if Sys.file_exists docker_path then docker_path else dev_path in
Printf.eprintf "[sx-http] static_dir=%s\n%!" static_dir;
_font_base := static_dir ^ "/fonts";
(* HTTP mode always expands components — bind once, shared across domains *)
ignore (env_bind env "expand-components?" (NativeFn ("expand-components?", fun _args -> Bool true)));
(* Inject shell statics with real file hashes, CSS, and pages registry *)
@@ -3156,6 +3288,41 @@ let http_mode port =
The env_bind hook keeps it in sync with any future bindings. *)
(* Enable lazy JIT — compile lambdas to bytecode on first call *)
register_jit_hook env;
(* Install global IO resolver so perform works inside aser/eval_expr.
This lets components call measure-text during server-side rendering. *)
Sx_types._cek_io_resolver := Some (fun request _state ->
(* Extract op and args from either Dict {op, args} or raw List/ListRef (op-symbol args...) *)
let op, args = match request with
| Dict d ->
let o = match Hashtbl.find_opt d "op" with Some (String s) -> s | Some (Symbol s) -> s | _ -> "" in
let a = match Hashtbl.find_opt d "args" with Some v -> v | None -> Nil in
(o, a)
| List (Symbol op_sym :: rest) | ListRef { contents = Symbol op_sym :: rest } -> (op_sym, List rest)
| List (String op_str :: rest) | ListRef { contents = String op_str :: rest } -> (op_str, List rest)
| _ -> ("", Nil)
in
match op with
| "text-measure" ->
let font = match args with
| List (String f :: _) -> f
| _ -> "serif" in
let size = match args with
| List [_font; Number sz; _text] -> sz
| List [_font; Number sz] -> sz
| _ -> 16.0 in
let text = match args with
| List [_font; _sz; String t] -> t
| _ -> "" in
let (width, height, ascent, descent) = measure_text_otfm font size text in
let d = Hashtbl.create 4 in
Hashtbl.replace d "width" (Number width);
Hashtbl.replace d "height" (Number height);
Hashtbl.replace d "ascent" (Number ascent);
Hashtbl.replace d "descent" (Number descent);
Dict d
| "io-sleep" | "sleep" -> Nil
| "import" -> Nil
| _ -> Nil);
(* Response cache — path → full HTTP response string.
Populated during pre-warm, serves cached responses in <0.1ms.
Thread-safe: reads are lock-free (Hashtbl.find_opt is atomic for
@@ -3721,6 +3888,7 @@ let site_mode () =
ignore (env_bind env "_spec-dir" (String spec_base));
ignore (env_bind env "_lib-dir" (String lib_base));
ignore (env_bind env "_web-dir" (String web_base));
_font_base := static_dir ^ "/fonts";
_import_env := Some env;
let core_files = [
spec_base ^ "/parser.sx"; spec_base ^ "/render.sx"; spec_base ^ "/signals.sx";

View File

@@ -585,7 +585,18 @@ and cek_step_loop state =
(* cek-run *)
and cek_run state =
(let final = (cek_step_loop (state)) in (if sx_truthy ((cek_suspended_p (final))) then (raise (Eval_error (value_to_str (String "IO suspension in non-IO context")))) else (cek_value (final))))
let rec run s =
let final = cek_step_loop s in
if sx_truthy (cek_suspended_p final) then
match !Sx_types._cek_io_resolver with
| Some resolver ->
let request = cek_io_request final in
let result = resolver request final in
run (cek_resume final result)
| None ->
raise (Eval_error (value_to_str (String "IO suspension in non-IO context")))
else cek_value final
in run state
(* cek-resume *)
and cek_resume suspended_state result' =

View File

@@ -246,6 +246,12 @@ exception Parse_error of string
to avoid a dependency cycle between sx_runtime and sx_vm. *)
exception CekPerformRequest of value
(** Hook: resolve IO suspension inline in cek_run.
When set, cek_run calls this instead of raising "IO suspension in non-IO context".
The function receives the suspended state and returns the resolved value.
Used by the HTTP server to handle perform (text-measure) during aser. *)
let _cek_io_resolver : (value -> value -> value) option ref = ref None
(** Hook: convert VM suspension exceptions to CekPerformRequest.
Set by sx_vm after it defines VmSuspended. Called by sx_runtime.sx_apply_cek. *)
let _convert_vm_suspension : (exn -> unit) ref = ref (fun _ -> ())

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1,41 @@
;; ~font — web font component
;;
;; Works like ~tw: emits @font-face CSS into the cssx scope,
;; sets font-family on the parent element via spread.
;;
;; Usage:
;; (div (~font :family "Pretext Serif" :src "/static/fonts/DejaVuSerif.ttf")
;; "This text uses DejaVu Serif")
;;
;; (p (~font :family "Pretext Serif") ;; reuse already-declared font
;; "No :src needed after first declaration")
;; Track which font families have already emitted @font-face rules
(define *font-declared* (dict))
(defcomp
~font
(&key family src weight style format)
(let
((fam (or family "serif"))
(wt (or weight "normal"))
(st (or style "normal"))
(fmt (or format "truetype")))
(when
(and src (not (has-key? *font-declared* fam)))
(dict-set! *font-declared* fam true)
(collect!
"cssx"
(str
"@font-face{font-family:'"
fam
"';src:url('"
src
"') format('"
fmt
"');font-weight:"
wt
";font-style:"
st
";}")))
(make-spread {:style (str "font-family:'" fam "',serif;")})))

View File

@@ -1,7 +1,7 @@
;; Pretext demo — DOM-free text layout
;;
;; Visual-first: shows typeset text, then explains how.
;; Uses measure-text (perform) for real glyph measurement.
;; Uses measure-text (perform) for glyph measurement.
;; Compute positioned word data for one line.
(define
@@ -45,15 +45,6 @@
{:y y :words (pretext-position-line lw lwid gap)}))))))
(range n-lines)))))
;; Measure all words and return widths list
(define
pretext-measure-words
(fn
(words font size)
(map
(fn (w) (let ((m (measure-text font size w))) (get m :width)))
words)))
;; Render pre-computed positioned lines
(defcomp
~pretext-demo/render-paragraph
@@ -109,13 +100,23 @@
(font "serif")
(size 15))
(let
((sw (pretext-measure-words sample-words font size))
(space-m (measure-text font size " "))
(n-words (len sample-words)))
((sw (list)) (n-words (len sample-words)))
(for-each
(fn
(w)
(let
((m (measure-text font size w)))
(append! sw (get m :width))))
sample-words)
(let
((space-w (get space-m :width)))
((space-m (measure-text font size " "))
(space-w (get (measure-text font size " ") :width)))
(div
(~tw :tokens "space-y-10")
(begin
(~tw :tokens "space-y-10")
(~font
:family "Pretext Serif"
:src "/static/fonts/DejaVuSerif.ttf"))
(div
(~tw :tokens "space-y-4")
(div
@@ -166,11 +167,6 @@
(div
(~tw :tokens "grid grid-cols-1 md:grid-cols-2 gap-4")
(~pretext-demo/render-paragraph
:words sample-words
:widths sw
:space-width space-w
:max-width nm
:line-height 22
:lines (pretext-layout-lines
sample-words
sw
@@ -178,14 +174,11 @@
space-w
nm
22)
:max-width nm
:line-height 22
:n-words n-words
:label "Greedy (browser default)")
(~pretext-demo/render-paragraph
:words sample-words
:widths sw
:space-width space-w
:max-width nm
:line-height 22
:lines (pretext-layout-lines
sample-words
sw
@@ -193,6 +186,8 @@
space-w
nm
22)
:max-width nm
:line-height 22
:n-words n-words
:label "Knuth-Plass optimal"))))
(div