From 666e29d5f08859788ded2879167c202d25642eb5 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 25 Apr 2026 18:22:10 +0000 Subject: [PATCH] =?UTF-8?q?tcl:=20Phase=201=20tokenizer=20=E2=80=94=20Dode?= =?UTF-8?q?kalogue=20(52=20tests=20green)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lib/tcl/test.sh | 51 +++++++ lib/tcl/tests/parse.sx | 135 ++++++++++++++++++ lib/tcl/tokenizer.sx | 308 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 494 insertions(+) create mode 100755 lib/tcl/test.sh create mode 100644 lib/tcl/tests/parse.sx create mode 100644 lib/tcl/tokenizer.sx diff --git a/lib/tcl/test.sh b/lib/tcl/test.sh new file mode 100755 index 00000000..a2291ab8 --- /dev/null +++ b/lib/tcl/test.sh @@ -0,0 +1,51 @@ +#!/usr/bin/env bash +# Tcl-on-SX test runner — epoch protocol to sx_server.exe +set -uo pipefail +cd "$(git rev-parse --show-toplevel)" + +SX_SERVER="${SX_SERVER:-hosts/ocaml/_build/default/bin/sx_server.exe}" +if [ ! -x "$SX_SERVER" ]; then + SX_SERVER="/root/rose-ash/hosts/ocaml/_build/default/bin/sx_server.exe" +fi +if [ ! -x "$SX_SERVER" ]; then echo "ERROR: sx_server.exe not found"; exit 1; fi + +VERBOSE="${1:-}" +TMPFILE=$(mktemp) +trap "rm -f $TMPFILE" EXIT + +cat > "$TMPFILE" << 'EPOCHS' +(epoch 1) +(load "lib/tcl/tokenizer.sx") +(epoch 2) +(load "lib/tcl/tests/parse.sx") +(epoch 3) +(eval "(tcl-run-parse-tests)") +EPOCHS + +OUTPUT=$(timeout 30 "$SX_SERVER" < "$TMPFILE" 2>&1) +[ "$VERBOSE" = "-v" ] && echo "$OUTPUT" + +# Result follows an (ok-len 3 N) line +RESULT=$(echo "$OUTPUT" | grep -A1 "^(ok-len 3 " | tail -1) +if [ -z "$RESULT" ]; then + RESULT=$(echo "$OUTPUT" | grep "^(ok 3 " | sed 's/^(ok 3 //' | sed 's/)$//') +fi +if [ -z "$RESULT" ]; then + echo "ERROR: no result from epoch 3" + echo "$OUTPUT" | tail -10 + exit 1 +fi + +PASSED=$(echo "$RESULT" | grep -o ':passed [0-9]*' | grep -o '[0-9]*$') +FAILED=$(echo "$RESULT" | grep -o ':failed [0-9]*' | grep -o '[0-9]*$') +PASSED=${PASSED:-0}; FAILED=${FAILED:-1} +TOTAL=$((PASSED + FAILED)) + +if [ "$FAILED" = "0" ]; then + echo "ok $PASSED/$TOTAL tcl-tokenize tests passed" + exit 0 +else + echo "FAIL $PASSED/$TOTAL passed, $FAILED failed" + echo "$RESULT" + exit 1 +fi diff --git a/lib/tcl/tests/parse.sx b/lib/tcl/tests/parse.sx new file mode 100644 index 00000000..efd39c7e --- /dev/null +++ b/lib/tcl/tests/parse.sx @@ -0,0 +1,135 @@ +(define tcl-parse-pass 0) +(define tcl-parse-fail 0) +(define tcl-parse-failures (list)) + +(define tcl-assert + (fn (label expected actual) + (if (= expected actual) + (set! tcl-parse-pass (+ tcl-parse-pass 1)) + (begin + (set! tcl-parse-fail (+ tcl-parse-fail 1)) + (append! tcl-parse-failures + (str label ": expected=" (str expected) " got=" (str actual))))))) + +(define tcl-first-cmd + (fn (src) (nth (tcl-tokenize src) 0))) + +(define tcl-cmd-words + (fn (src) (get (tcl-first-cmd src) :words))) + +(define tcl-word + (fn (src wi) (nth (tcl-cmd-words src) wi))) + +(define tcl-parts + (fn (src wi) (get (tcl-word src wi) :parts))) + +(define tcl-part + (fn (src wi pi) (nth (tcl-parts src wi) pi))) + +(define tcl-run-parse-tests + (fn () + (set! tcl-parse-pass 0) + (set! tcl-parse-fail 0) + (set! tcl-parse-failures (list)) + + ; empty / whitespace-only + (tcl-assert "empty" 0 (len (tcl-tokenize ""))) + (tcl-assert "ws-only" 0 (len (tcl-tokenize " "))) + (tcl-assert "nl-only" 0 (len (tcl-tokenize "\n\n"))) + + ; single command word count + (tcl-assert "1word" 1 (len (tcl-cmd-words "set"))) + (tcl-assert "3words" 3 (len (tcl-cmd-words "set x 1"))) + (tcl-assert "4words" 4 (len (tcl-cmd-words "set a b c"))) + + ; word type — bare word is compound + (tcl-assert "bare-type" "compound" (get (tcl-word "set x 1" 0) :type)) + (tcl-assert "bare-quoted" false (get (tcl-word "set x 1" 0) :quoted)) + (tcl-assert "bare-part-type" "text" (get (tcl-part "set x 1" 0 0) :type)) + (tcl-assert "bare-part-val" "set" (get (tcl-part "set x 1" 0 0) :value)) + (tcl-assert "bare-part2-val" "x" (get (tcl-part "set x 1" 1 0) :value)) + (tcl-assert "bare-part3-val" "1" (get (tcl-part "set x 1" 2 0) :value)) + + ; multiple commands + (tcl-assert "semi-sep" 2 (len (tcl-tokenize "set x 1; set y 2"))) + (tcl-assert "nl-sep" 2 (len (tcl-tokenize "set x 1\nset y 2"))) + (tcl-assert "multi-nl" 3 (len (tcl-tokenize "a\nb\nc"))) + + ; comments + (tcl-assert "comment-only" 0 (len (tcl-tokenize "# comment"))) + (tcl-assert "comment-nl" 0 (len (tcl-tokenize "# comment\n"))) + (tcl-assert "comment-then-cmd" 1 (len (tcl-tokenize "# comment\nset x 1"))) + (tcl-assert "semi-then-comment" 1 (len (tcl-tokenize "set x 1; # comment"))) + + ; brace-quoted words + (tcl-assert "brace-type" "braced" (get (tcl-word "{hello}" 0) :type)) + (tcl-assert "brace-value" "hello" (get (tcl-word "{hello}" 0) :value)) + (tcl-assert "brace-spaces" "hello world" (get (tcl-word "{hello world}" 0) :value)) + (tcl-assert "brace-nested" "a {b} c" (get (tcl-word "{a {b} c}" 0) :value)) + (tcl-assert "brace-no-var-sub" "hello $x" (get (tcl-word "{hello $x}" 0) :value)) + (tcl-assert "brace-no-cmd-sub" "[expr 1]" (get (tcl-word "{[expr 1]}" 0) :value)) + + ; double-quoted words + (tcl-assert "dq-type" "compound" (get (tcl-word "\"hello\"" 0) :type)) + (tcl-assert "dq-quoted" true (get (tcl-word "\"hello\"" 0) :quoted)) + (tcl-assert "dq-literal" "hello" (get (tcl-part "\"hello\"" 0 0) :value)) + + ; variable substitution in bare word + (tcl-assert "var-type" "var" (get (tcl-part "$x" 0 0) :type)) + (tcl-assert "var-name" "x" (get (tcl-part "$x" 0 0) :name)) + (tcl-assert "var-long" "long_name" (get (tcl-part "$long_name" 0 0) :name)) + + ; ${name} form + (tcl-assert "var-brace-type" "var" (get (tcl-part "${x}" 0 0) :type)) + (tcl-assert "var-brace-name" "x" (get (tcl-part "${x}" 0 0) :name)) + + ; array variable substitution + (tcl-assert "arr-type" "var-arr" (get (tcl-part "$arr(key)" 0 0) :type)) + (tcl-assert "arr-name" "arr" (get (tcl-part "$arr(key)" 0 0) :name)) + (tcl-assert "arr-key-len" 1 (len (get (tcl-part "$arr(key)" 0 0) :key))) + (tcl-assert "arr-key-text" "key" + (get (nth (get (tcl-part "$arr(key)" 0 0) :key) 0) :value)) + + ; command substitution + (tcl-assert "cmd-type" "cmd" (get (tcl-part "[expr 1+1]" 0 0) :type)) + (tcl-assert "cmd-src" "expr 1+1" (get (tcl-part "[expr 1+1]" 0 0) :src)) + + ; nested command substitution + (tcl-assert "cmd-nested-src" "expr [string length x]" + (get (tcl-part "[expr [string length x]]" 0 0) :src)) + + ; backslash substitution in double-quoted word + (let ((ps (tcl-parts "\"a\\nb\"" 0))) + (begin + (tcl-assert "bs-n-part0" "a" (get (nth ps 0) :value)) + (tcl-assert "bs-n-part1" "\n" (get (nth ps 1) :value)) + (tcl-assert "bs-n-part2" "b" (get (nth ps 2) :value)))) + + (let ((ps (tcl-parts "\"a\\tb\"" 0))) + (tcl-assert "bs-t-part1" "\t" (get (nth ps 1) :value))) + + (let ((ps (tcl-parts "\"a\\\\b\"" 0))) + (tcl-assert "bs-bs-part1" "\\" (get (nth ps 1) :value))) + + ; mixed word: text + var + text in double-quoted + (let ((ps (tcl-parts "\"hello $name!\"" 0))) + (begin + (tcl-assert "mixed-text0" "hello " (get (nth ps 0) :value)) + (tcl-assert "mixed-var1-type" "var" (get (nth ps 1) :type)) + (tcl-assert "mixed-var1-name" "name" (get (nth ps 1) :name)) + (tcl-assert "mixed-text2" "!" (get (nth ps 2) :value)))) + + ; {*} expansion + (tcl-assert "expand-type" "expand" (get (tcl-word "{*}$list" 0) :type)) + + ; line continuation between words + (tcl-assert "cont-words" 3 (len (tcl-cmd-words "set x \\\n 1"))) + + ; continuation — third command word is correct + (tcl-assert "cont-word2-val" "1" + (get (tcl-part "set x \\\n 1" 2 0) :value)) + + (dict + "passed" tcl-parse-pass + "failed" tcl-parse-fail + "failures" tcl-parse-failures))) diff --git a/lib/tcl/tokenizer.sx b/lib/tcl/tokenizer.sx new file mode 100644 index 00000000..6ad455ac --- /dev/null +++ b/lib/tcl/tokenizer.sx @@ -0,0 +1,308 @@ +(define tcl-ws? (fn (c) (or (= c " ") (= c "\t") (= c "\r")))) + +(define tcl-alpha? + (fn (c) + (and + (not (= c nil)) + (or (and (>= c "a") (<= c "z")) (and (>= c "A") (<= c "Z")))))) + +(define tcl-digit? + (fn (c) (and (not (= c nil)) (>= c "0") (<= c "9")))) + +(define tcl-ident-start? + (fn (c) (or (tcl-alpha? c) (= c "_")))) + +(define tcl-ident-char? + (fn (c) (or (tcl-ident-start? c) (tcl-digit? c)))) + +(define tcl-tokenize + (fn (src) + (let ((pos 0) (src-len (len src)) (commands (list))) + + (define char-at + (fn (off) + (if (< (+ pos off) src-len) (nth src (+ pos off)) nil))) + + (define cur (fn () (char-at 0))) + + (define advance! (fn (n) (set! pos (+ pos n)))) + + (define skip-ws! + (fn () + (when (tcl-ws? (cur)) + (begin (advance! 1) (skip-ws!))))) + + (define skip-to-eol! + (fn () + (when (and (< pos src-len) (not (= (cur) "\n"))) + (begin (advance! 1) (skip-to-eol!))))) + + (define skip-brace-content! + (fn (d) + (when (and (< pos src-len) (> d 0)) + (cond + ((= (cur) "{") (begin (advance! 1) (skip-brace-content! (+ d 1)))) + ((= (cur) "}") (begin (advance! 1) (skip-brace-content! (- d 1)))) + (else (begin (advance! 1) (skip-brace-content! d))))))) + + (define skip-dquote-content! + (fn () + (when (and (< pos src-len) (not (= (cur) "\""))) + (begin + (when (= (cur) "\\") (advance! 1)) + (when (< pos src-len) (advance! 1)) + (skip-dquote-content!))))) + + (define parse-bs + (fn () + (advance! 1) + (let ((c (cur))) + (cond + ((= c nil) "\\") + ((= c "n") (begin (advance! 1) "\n")) + ((= c "t") (begin (advance! 1) "\t")) + ((= c "r") (begin (advance! 1) "\r")) + ((= c "\\") (begin (advance! 1) "\\")) + ((= c "[") (begin (advance! 1) "[")) + ((= c "]") (begin (advance! 1) "]")) + ((= c "{") (begin (advance! 1) "{")) + ((= c "}") (begin (advance! 1) "}")) + ((= c "$") (begin (advance! 1) "$")) + ((= c ";") (begin (advance! 1) ";")) + ((= c "\"") (begin (advance! 1) "\"")) + ((= c "'") (begin (advance! 1) "'")) + ((= c " ") (begin (advance! 1) " ")) + ((= c "\n") + (begin + (advance! 1) + (skip-ws!) + " ")) + (else (begin (advance! 1) (str "\\" c))))))) + + (define parse-cmd-sub + (fn () + (advance! 1) + (let ((start pos) (depth 1)) + (define scan! + (fn () + (when (and (< pos src-len) (> depth 0)) + (cond + ((= (cur) "[") + (begin (set! depth (+ depth 1)) (advance! 1) (scan!))) + ((= (cur) "]") + (begin + (set! depth (- depth 1)) + (when (> depth 0) (advance! 1)) + (scan!))) + ((= (cur) "{") + (begin (advance! 1) (skip-brace-content! 1) (scan!))) + ((= (cur) "\"") + (begin + (advance! 1) + (skip-dquote-content!) + (when (= (cur) "\"") (advance! 1)) + (scan!))) + ((= (cur) "\\") + (begin (advance! 1) (when (< pos src-len) (advance! 1)) (scan!))) + (else (begin (advance! 1) (scan!))))))) + (scan!) + (let ((src-text (slice src start pos))) + (begin + (when (= (cur) "]") (advance! 1)) + {:type "cmd" :src src-text}))))) + + (define scan-name! + (fn () + (when (and (< pos src-len) (not (= (cur) "}"))) + (begin (advance! 1) (scan-name!))))) + + (define scan-ns-name! + (fn () + (cond + ((tcl-ident-char? (cur)) + (begin (advance! 1) (scan-ns-name!))) + ((and (= (cur) ":") (= (char-at 1) ":")) + (begin (advance! 2) (scan-ns-name!))) + (else nil)))) + + (define scan-klit! + (fn () + (when (and (< pos src-len) + (not (= (cur) ")")) + (not (= (cur) "$")) + (not (= (cur) "[")) + (not (= (cur) "\\"))) + (begin (advance! 1) (scan-klit!))))) + + (define scan-key! + (fn (kp) + (when (and (< pos src-len) (not (= (cur) ")"))) + (cond + ((= (cur) "$") + (begin (append! kp (parse-var-sub)) (scan-key! kp))) + ((= (cur) "[") + (begin (append! kp (parse-cmd-sub)) (scan-key! kp))) + ((= (cur) "\\") + (begin + (append! kp {:type "text" :value (parse-bs)}) + (scan-key! kp))) + (else + (let ((kstart pos)) + (begin + (scan-klit!) + (append! kp {:type "text" :value (slice src kstart pos)}) + (scan-key! kp)))))))) + + (define parse-var-sub + (fn () + (advance! 1) + (cond + ((= (cur) "{") + (begin + (advance! 1) + (let ((start pos)) + (begin + (scan-name!) + (let ((name (slice src start pos))) + (begin + (when (= (cur) "}") (advance! 1)) + {:type "var" :name name})))))) + ((tcl-ident-start? (cur)) + (let ((start pos)) + (begin + (scan-ns-name!) + (let ((name (slice src start pos))) + (if (= (cur) "(") + (begin + (advance! 1) + (let ((key-parts (list))) + (begin + (scan-key! key-parts) + (when (= (cur) ")") (advance! 1)) + {:type "var-arr" :name name :key key-parts}))) + {:type "var" :name name}))))) + (else {:type "text" :value "$"})))) + + (define scan-lit! + (fn (stop?) + (when (and (< pos src-len) + (not (stop? (cur))) + (not (= (cur) "$")) + (not (= (cur) "[")) + (not (= (cur) "\\"))) + (begin (advance! 1) (scan-lit! stop?))))) + + (define parse-word-parts! + (fn (parts stop?) + (when (and (< pos src-len) (not (stop? (cur)))) + (cond + ((= (cur) "$") + (begin (append! parts (parse-var-sub)) (parse-word-parts! parts stop?))) + ((= (cur) "[") + (begin (append! parts (parse-cmd-sub)) (parse-word-parts! parts stop?))) + ((= (cur) "\\") + (begin + (append! parts {:type "text" :value (parse-bs)}) + (parse-word-parts! parts stop?))) + (else + (let ((start pos)) + (begin + (scan-lit! stop?) + (when (> pos start) + (append! parts {:type "text" :value (slice src start pos)})) + (parse-word-parts! parts stop?)))))))) + + (define parse-brace-word + (fn () + (advance! 1) + (let ((depth 1) (start pos)) + (define scan! + (fn () + (when (and (< pos src-len) (> depth 0)) + (cond + ((= (cur) "{") + (begin (set! depth (+ depth 1)) (advance! 1) (scan!))) + ((= (cur) "}") + (begin (set! depth (- depth 1)) (when (> depth 0) (advance! 1)) (scan!))) + (else (begin (advance! 1) (scan!))))))) + (scan!) + (let ((value (slice src start pos))) + (begin + (when (= (cur) "}") (advance! 1)) + {:type "braced" :value value}))))) + + (define parse-dquote-word + (fn () + (advance! 1) + (let ((parts (list))) + (begin + (parse-word-parts! parts (fn (c) (or (= c "\"") (= c nil)))) + (when (= (cur) "\"") (advance! 1)) + {:type "compound" :parts parts :quoted true})))) + + (define parse-bare-word + (fn () + (let ((parts (list))) + (begin + (parse-word-parts! + parts + (fn (c) (or (tcl-ws? c) (= c "\n") (= c ";") (= c nil)))) + {:type "compound" :parts parts :quoted false})))) + + (define parse-word-no-expand + (fn () + (cond + ((= (cur) "{") (parse-brace-word)) + ((= (cur) "\"") (parse-dquote-word)) + (else (parse-bare-word))))) + + (define parse-word + (fn () + (cond + ((and (= (cur) "{") (= (char-at 1) "*") (= (char-at 2) "}")) + (begin + (advance! 3) + {:type "expand" :word (parse-word-no-expand)})) + ((= (cur) "{") (parse-brace-word)) + ((= (cur) "\"") (parse-dquote-word)) + (else (parse-bare-word))))) + + (define parse-words! + (fn (words) + (skip-ws!) + (cond + ((or (= (cur) nil) (= (cur) "\n") (= (cur) ";")) nil) + ((and (= (cur) "\\") (= (char-at 1) "\n")) + (begin (advance! 2) (skip-ws!) (parse-words! words))) + (else + (begin + (append! words (parse-word)) + (parse-words! words)))))) + + (define skip-seps! + (fn () + (when (< pos src-len) + (cond + ((or (tcl-ws? (cur)) (= (cur) "\n") (= (cur) ";")) + (begin (advance! 1) (skip-seps!))) + ((and (= (cur) "\\") (= (char-at 1) "\n")) + (begin (advance! 2) (skip-seps!))) + (else nil))))) + + (define parse-all! + (fn () + (skip-seps!) + (when (< pos src-len) + (cond + ((= (cur) "#") + (begin (skip-to-eol!) (parse-all!))) + (else + (let ((words (list))) + (begin + (parse-words! words) + (when (> (len words) 0) + (append! commands {:type "command" :words words})) + (parse-all!)))))))) + + (parse-all!) + commands)))