2 Commits

Author SHA1 Message Date
dbba2fe418 apl: tick Phase 1 tokenizer checkbox + progress log
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 11s
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-25 18:23:06 +00:00
c73b696494 apl: tokenizer + 46 tests (Phase 1, step 1)
Unicode-aware byte scanner using starts-with?/consume! for multi-byte
APL glyphs. Handles numbers (¯-negative), string literals, identifiers
(⎕ system names), all APL function/operator glyphs, :Keywords,
comments ⍝, diamond ⋄, assignment ←.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-25 18:22:30 +00:00
8 changed files with 257 additions and 595 deletions

83
lib/apl/tests/parse.sx Normal file
View File

@@ -0,0 +1,83 @@
(define apl-test-count 0)
(define apl-test-pass 0)
(define apl-test-fails (list))
(define apl-test
(fn (name actual expected)
(begin
(set! apl-test-count (+ apl-test-count 1))
(if (= actual expected)
(set! apl-test-pass (+ apl-test-pass 1))
(append! apl-test-fails {:name name :actual actual :expected expected})))))
(define tok-types
(fn (src)
(map (fn (t) (get t :type)) (apl-tokenize src))))
(define tok-values
(fn (src)
(map (fn (t) (get t :value)) (apl-tokenize src))))
(define tok-count
(fn (src)
(len (apl-tokenize src))))
(define tok-type-at
(fn (src i)
(get (nth (apl-tokenize src) i) :type)))
(define tok-value-at
(fn (src i)
(get (nth (apl-tokenize src) i) :value)))
(apl-test "empty: no tokens" (tok-count "") 0)
(apl-test "empty: whitespace only" (tok-count " ") 0)
(apl-test "num: zero" (tok-values "0") (list 0))
(apl-test "num: positive" (tok-values "42") (list 42))
(apl-test "num: large" (tok-values "12345") (list 12345))
(apl-test "num: negative" (tok-values "¯5") (list -5))
(apl-test "num: negative zero" (tok-values "¯0") (list 0))
(apl-test "num: strand count" (tok-count "1 2 3") 3)
(apl-test "num: strand types" (tok-types "1 2 3") (list :num :num :num))
(apl-test "num: strand values" (tok-values "1 2 3") (list 1 2 3))
(apl-test "num: neg in strand" (tok-values "1 ¯2 3") (list 1 -2 3))
(apl-test "str: empty" (tok-values "''") (list ""))
(apl-test "str: single char" (tok-values "'a'") (list "a"))
(apl-test "str: word" (tok-values "'hello'") (list "hello"))
(apl-test "str: escaped quote" (tok-values "''''") (list "'"))
(apl-test "str: type" (tok-types "'abc'") (list :str))
(apl-test "name: simple" (tok-values "foo") (list "foo"))
(apl-test "name: type" (tok-types "foo") (list :name))
(apl-test "name: mixed case" (tok-values "MyVar") (list "MyVar"))
(apl-test "name: with digits" (tok-values "x1") (list "x1"))
(apl-test "name: system var" (tok-values "⎕IO") (list "⎕IO"))
(apl-test "name: system var type" (tok-types "⎕IO") (list :name))
(apl-test "glyph: plus" (tok-types "+") (list :glyph))
(apl-test "glyph: plus value" (tok-values "+") (list "+"))
(apl-test "glyph: iota" (tok-values "") (list ""))
(apl-test "glyph: reduce" (tok-values "+/") (list "+" "/"))
(apl-test "glyph: floor" (tok-values "⌊") (list "⌊"))
(apl-test "glyph: rho" (tok-values "") (list ""))
(apl-test "glyph: alpha omega" (tok-types " ⍵") (list :glyph :glyph))
(apl-test "punct: lparen" (tok-types "(") (list :lparen))
(apl-test "punct: rparen" (tok-types ")") (list :rparen))
(apl-test "punct: brackets" (tok-types "[42]") (list :lbracket :num :rbracket))
(apl-test "punct: braces" (tok-types "{}") (list :lbrace :rbrace))
(apl-test "punct: semi" (tok-types ";") (list :semi))
(apl-test "assign: arrow" (tok-types "x←1") (list :name :assign :num))
(apl-test "diamond: separator" (tok-types "1⋄2") (list :num :diamond :num))
(apl-test "newline: emitted" (tok-types "1\n2") (list :num :newline :num))
(apl-test "comment: skipped" (tok-count "⍝ ignore me") 0)
(apl-test "comment: rest ignored" (tok-count "1 ⍝ note") 1)
(apl-test "colon: bare" (tok-types ":") (list :colon))
(apl-test "keyword: If" (tok-values ":If") (list ":If"))
(apl-test "keyword: type" (tok-types ":While") (list :keyword))
(apl-test "keyword: EndFor" (tok-values ":EndFor") (list ":EndFor"))
(apl-test "expr: +/ 5" (tok-types "+/ 5") (list :glyph :glyph :glyph :num))
(apl-test "expr: x←42" (tok-count "x←42") 3)
(apl-test "expr: dfn body" (tok-types "{+⍵}")
(list :lbrace :glyph :glyph :glyph :rbrace))
(define apl-tokenize-test-summary
(str "tokenizer " apl-test-pass "/" apl-test-count
(if (= (len apl-test-fails) 0) "" (str " FAILS: " apl-test-fails))))

168
lib/apl/tokenizer.sx Normal file
View File

@@ -0,0 +1,168 @@
(define apl-glyph-set
(list "+" "-" "×" "÷" "*" "⍟" "⌈" "⌊" "|" "!" "?" "○" "~" "<" "≤" "=" "≥" ">" "≠"
"∊" "∧" "" "⍱" "⍲" "," "⍪" "" "⌽" "⊖" "⍉" "↑" "↓" "⊂" "⊃" "⊆"
"" "∩" "" "⍸" "⌷" "⍋" "⍒" "⊥" "" "⊣" "⊢" "⍎" "⍕"
"" "⍵" "∇" "/" "\\" "¨" "⍨" "∘" "." "⍣" "⍤" "⍥" "@" "¯"))
(define apl-glyph?
(fn (ch)
(some (fn (g) (= g ch)) apl-glyph-set)))
(define apl-digit?
(fn (ch)
(and (string? ch) (>= ch "0") (<= ch "9"))))
(define apl-alpha?
(fn (ch)
(and (string? ch)
(or (and (>= ch "a") (<= ch "z"))
(and (>= ch "A") (<= ch "Z"))
(= ch "_")))))
(define apl-tokenize
(fn (source)
(let ((pos 0)
(src-len (len source))
(tokens (list)))
(define tok-push!
(fn (type value)
(append! tokens {:type type :value value})))
(define cur-sw?
(fn (ch)
(and (< pos src-len) (starts-with? (slice source pos) ch))))
(define cur-byte
(fn ()
(if (< pos src-len) (nth source pos) nil)))
(define advance!
(fn ()
(set! pos (+ pos 1))))
(define consume!
(fn (ch)
(set! pos (+ pos (len ch)))))
(define find-glyph
(fn ()
(let ((rem (slice source pos)))
(let ((matches (filter (fn (g) (starts-with? rem g)) apl-glyph-set)))
(if (> (len matches) 0) (first matches) nil)))))
(define read-digits!
(fn (acc)
(if (and (< pos src-len) (apl-digit? (cur-byte)))
(let ((ch (cur-byte)))
(begin
(advance!)
(read-digits! (str acc ch))))
acc)))
(define read-ident-cont!
(fn ()
(when (and (< pos src-len)
(let ((ch (cur-byte)))
(or (apl-alpha? ch) (apl-digit? ch))))
(begin
(advance!)
(read-ident-cont!)))))
(define read-string!
(fn (acc)
(cond
((>= pos src-len) acc)
((cur-sw? "'")
(if (and (< (+ pos 1) src-len) (cur-sw? "'"))
(begin
(advance!)
(advance!)
(read-string! (str acc "'")))
(begin (advance!) acc)))
(true
(let ((ch (cur-byte)))
(begin
(advance!)
(read-string! (str acc ch))))))))
(define skip-line!
(fn ()
(when (and (< pos src-len) (not (cur-sw? "\n")))
(begin
(advance!)
(skip-line!)))))
(define scan!
(fn ()
(when (< pos src-len)
(let ((ch (cur-byte)))
(cond
((or (= ch " ") (= ch "\t") (= ch "\r"))
(begin (advance!) (scan!)))
((= ch "\n")
(begin (advance!) (tok-push! :newline nil) (scan!)))
((cur-sw? "⍝")
(begin (skip-line!) (scan!)))
((cur-sw? "⋄")
(begin (consume! "⋄") (tok-push! :diamond nil) (scan!)))
((= ch "(")
(begin (advance!) (tok-push! :lparen nil) (scan!)))
((= ch ")")
(begin (advance!) (tok-push! :rparen nil) (scan!)))
((= ch "[")
(begin (advance!) (tok-push! :lbracket nil) (scan!)))
((= ch "]")
(begin (advance!) (tok-push! :rbracket nil) (scan!)))
((= ch "{")
(begin (advance!) (tok-push! :lbrace nil) (scan!)))
((= ch "}")
(begin (advance!) (tok-push! :rbrace nil) (scan!)))
((= ch ";")
(begin (advance!) (tok-push! :semi nil) (scan!)))
((cur-sw? "←")
(begin (consume! "←") (tok-push! :assign nil) (scan!)))
((= ch ":")
(let ((start pos))
(begin
(advance!)
(if (and (< pos src-len) (apl-alpha? (cur-byte)))
(begin
(read-ident-cont!)
(tok-push! :keyword (slice source start pos)))
(tok-push! :colon nil))
(scan!))))
((and (cur-sw? "¯")
(< (+ pos (len "¯")) src-len)
(apl-digit? (nth source (+ pos (len "¯")))))
(begin
(consume! "¯")
(let ((digits (read-digits! "")))
(tok-push! :num (- 0 (parse-int digits 0))))
(scan!)))
((apl-digit? ch)
(begin
(let ((digits (read-digits! "")))
(tok-push! :num (parse-int digits 0)))
(scan!)))
((= ch "'")
(begin
(advance!)
(let ((s (read-string! "")))
(tok-push! :str s))
(scan!)))
((or (apl-alpha? ch) (cur-sw? "⎕"))
(let ((start pos))
(begin
(if (cur-sw? "⎕") (consume! "⎕") (advance!))
(read-ident-cont!)
(tok-push! :name (slice source start pos))
(scan!))))
(true
(let ((g (find-glyph)))
(if g
(begin (consume! g) (tok-push! :glyph g) (scan!))
(begin (advance!) (scan!))))))))))
(scan!)
tokens)))

View File

@@ -1,41 +0,0 @@
; Tcl parser — thin layer over tcl-tokenize
; Adds tcl-parse entry point and word utility fns
; Entry point: parse Tcl source to a list of commands.
; Returns same structure as tcl-tokenize.
(define tcl-parse (fn (src) (tcl-tokenize src)))
; True if word has no substitutions — value can be read statically.
; braced words are always simple. compound words are simple when all
; parts are plain text with no var/cmd parts.
(define tcl-word-simple?
(fn (word)
(cond
((= (get word :type) "braced") true)
((= (get word :type) "compound")
(let ((parts (get word :parts)))
(every? (fn (p) (= (get p :type) "text")) parts)))
(else false))))
; Concatenate text parts of a simple word into a single string.
; For braced words returns :value directly.
; For compound words with only text parts, joins them.
; Returns nil for words with substitutions.
(define tcl-word-literal
(fn (word)
(cond
((= (get word :type) "braced") (get word :value))
((= (get word :type) "compound")
(if (tcl-word-simple? word)
(join "" (map (fn (p) (get p :value)) (get word :parts)))
nil))
(else nil))))
; Number of words in a parsed command.
(define tcl-cmd-len
(fn (cmd) (len (get cmd :words))))
; Nth word literal from a command (index 0 = command name).
; Returns nil if word has substitutions.
(define tcl-nth-literal
(fn (cmd n) (tcl-word-literal (nth (get cmd :words) n))))

View File

@@ -1,53 +0,0 @@
#!/usr/bin/env bash
# Tcl-on-SX test runner — epoch protocol to sx_server.exe
set -uo pipefail
cd "$(git rev-parse --show-toplevel)"
SX_SERVER="${SX_SERVER:-hosts/ocaml/_build/default/bin/sx_server.exe}"
if [ ! -x "$SX_SERVER" ]; then
SX_SERVER="/root/rose-ash/hosts/ocaml/_build/default/bin/sx_server.exe"
fi
if [ ! -x "$SX_SERVER" ]; then echo "ERROR: sx_server.exe not found"; exit 1; fi
VERBOSE="${1:-}"
TMPFILE=$(mktemp)
trap "rm -f $TMPFILE" EXIT
cat > "$TMPFILE" << 'EPOCHS'
(epoch 1)
(load "lib/tcl/tokenizer.sx")
(epoch 2)
(load "lib/tcl/parser.sx")
(epoch 3)
(load "lib/tcl/tests/parse.sx")
(epoch 4)
(eval "(tcl-run-parse-tests)")
EPOCHS
OUTPUT=$(timeout 30 "$SX_SERVER" < "$TMPFILE" 2>&1)
[ "$VERBOSE" = "-v" ] && echo "$OUTPUT"
# Result follows an (ok-len 3 N) line
RESULT=$(echo "$OUTPUT" | grep -A1 "^(ok-len 4 " | tail -1)
if [ -z "$RESULT" ]; then
RESULT=$(echo "$OUTPUT" | grep "^(ok 4 " | sed 's/^(ok 3 //' | sed 's/)$//')
fi
if [ -z "$RESULT" ]; then
echo "ERROR: no result from epoch 4"
echo "$OUTPUT" | tail -10
exit 1
fi
PASSED=$(echo "$RESULT" | grep -o ':passed [0-9]*' | grep -o '[0-9]*$')
FAILED=$(echo "$RESULT" | grep -o ':failed [0-9]*' | grep -o '[0-9]*$')
PASSED=${PASSED:-0}; FAILED=${FAILED:-1}
TOTAL=$((PASSED + FAILED))
if [ "$FAILED" = "0" ]; then
echo "ok $PASSED/$TOTAL tcl-tokenize tests passed"
exit 0
else
echo "FAIL $PASSED/$TOTAL passed, $FAILED failed"
echo "$RESULT"
exit 1
fi

View File

@@ -1,186 +0,0 @@
(define tcl-parse-pass 0)
(define tcl-parse-fail 0)
(define tcl-parse-failures (list))
(define tcl-assert
(fn (label expected actual)
(if (= expected actual)
(set! tcl-parse-pass (+ tcl-parse-pass 1))
(begin
(set! tcl-parse-fail (+ tcl-parse-fail 1))
(append! tcl-parse-failures
(str label ": expected=" (str expected) " got=" (str actual)))))))
(define tcl-first-cmd
(fn (src) (nth (tcl-tokenize src) 0)))
(define tcl-cmd-words
(fn (src) (get (tcl-first-cmd src) :words)))
(define tcl-word
(fn (src wi) (nth (tcl-cmd-words src) wi)))
(define tcl-parts
(fn (src wi) (get (tcl-word src wi) :parts)))
(define tcl-part
(fn (src wi pi) (nth (tcl-parts src wi) pi)))
(define tcl-run-parse-tests
(fn ()
(set! tcl-parse-pass 0)
(set! tcl-parse-fail 0)
(set! tcl-parse-failures (list))
; empty / whitespace-only
(tcl-assert "empty" 0 (len (tcl-tokenize "")))
(tcl-assert "ws-only" 0 (len (tcl-tokenize " ")))
(tcl-assert "nl-only" 0 (len (tcl-tokenize "\n\n")))
; single command word count
(tcl-assert "1word" 1 (len (tcl-cmd-words "set")))
(tcl-assert "3words" 3 (len (tcl-cmd-words "set x 1")))
(tcl-assert "4words" 4 (len (tcl-cmd-words "set a b c")))
; word type — bare word is compound
(tcl-assert "bare-type" "compound" (get (tcl-word "set x 1" 0) :type))
(tcl-assert "bare-quoted" false (get (tcl-word "set x 1" 0) :quoted))
(tcl-assert "bare-part-type" "text" (get (tcl-part "set x 1" 0 0) :type))
(tcl-assert "bare-part-val" "set" (get (tcl-part "set x 1" 0 0) :value))
(tcl-assert "bare-part2-val" "x" (get (tcl-part "set x 1" 1 0) :value))
(tcl-assert "bare-part3-val" "1" (get (tcl-part "set x 1" 2 0) :value))
; multiple commands
(tcl-assert "semi-sep" 2 (len (tcl-tokenize "set x 1; set y 2")))
(tcl-assert "nl-sep" 2 (len (tcl-tokenize "set x 1\nset y 2")))
(tcl-assert "multi-nl" 3 (len (tcl-tokenize "a\nb\nc")))
; comments
(tcl-assert "comment-only" 0 (len (tcl-tokenize "# comment")))
(tcl-assert "comment-nl" 0 (len (tcl-tokenize "# comment\n")))
(tcl-assert "comment-then-cmd" 1 (len (tcl-tokenize "# comment\nset x 1")))
(tcl-assert "semi-then-comment" 1 (len (tcl-tokenize "set x 1; # comment")))
; brace-quoted words
(tcl-assert "brace-type" "braced" (get (tcl-word "{hello}" 0) :type))
(tcl-assert "brace-value" "hello" (get (tcl-word "{hello}" 0) :value))
(tcl-assert "brace-spaces" "hello world" (get (tcl-word "{hello world}" 0) :value))
(tcl-assert "brace-nested" "a {b} c" (get (tcl-word "{a {b} c}" 0) :value))
(tcl-assert "brace-no-var-sub" "hello $x" (get (tcl-word "{hello $x}" 0) :value))
(tcl-assert "brace-no-cmd-sub" "[expr 1]" (get (tcl-word "{[expr 1]}" 0) :value))
; double-quoted words
(tcl-assert "dq-type" "compound" (get (tcl-word "\"hello\"" 0) :type))
(tcl-assert "dq-quoted" true (get (tcl-word "\"hello\"" 0) :quoted))
(tcl-assert "dq-literal" "hello" (get (tcl-part "\"hello\"" 0 0) :value))
; variable substitution in bare word
(tcl-assert "var-type" "var" (get (tcl-part "$x" 0 0) :type))
(tcl-assert "var-name" "x" (get (tcl-part "$x" 0 0) :name))
(tcl-assert "var-long" "long_name" (get (tcl-part "$long_name" 0 0) :name))
; ${name} form
(tcl-assert "var-brace-type" "var" (get (tcl-part "${x}" 0 0) :type))
(tcl-assert "var-brace-name" "x" (get (tcl-part "${x}" 0 0) :name))
; array variable substitution
(tcl-assert "arr-type" "var-arr" (get (tcl-part "$arr(key)" 0 0) :type))
(tcl-assert "arr-name" "arr" (get (tcl-part "$arr(key)" 0 0) :name))
(tcl-assert "arr-key-len" 1 (len (get (tcl-part "$arr(key)" 0 0) :key)))
(tcl-assert "arr-key-text" "key"
(get (nth (get (tcl-part "$arr(key)" 0 0) :key) 0) :value))
; command substitution
(tcl-assert "cmd-type" "cmd" (get (tcl-part "[expr 1+1]" 0 0) :type))
(tcl-assert "cmd-src" "expr 1+1" (get (tcl-part "[expr 1+1]" 0 0) :src))
; nested command substitution
(tcl-assert "cmd-nested-src" "expr [string length x]"
(get (tcl-part "[expr [string length x]]" 0 0) :src))
; backslash substitution in double-quoted word
(let ((ps (tcl-parts "\"a\\nb\"" 0)))
(begin
(tcl-assert "bs-n-part0" "a" (get (nth ps 0) :value))
(tcl-assert "bs-n-part1" "\n" (get (nth ps 1) :value))
(tcl-assert "bs-n-part2" "b" (get (nth ps 2) :value))))
(let ((ps (tcl-parts "\"a\\tb\"" 0)))
(tcl-assert "bs-t-part1" "\t" (get (nth ps 1) :value)))
(let ((ps (tcl-parts "\"a\\\\b\"" 0)))
(tcl-assert "bs-bs-part1" "\\" (get (nth ps 1) :value)))
; mixed word: text + var + text in double-quoted
(let ((ps (tcl-parts "\"hello $name!\"" 0)))
(begin
(tcl-assert "mixed-text0" "hello " (get (nth ps 0) :value))
(tcl-assert "mixed-var1-type" "var" (get (nth ps 1) :type))
(tcl-assert "mixed-var1-name" "name" (get (nth ps 1) :name))
(tcl-assert "mixed-text2" "!" (get (nth ps 2) :value))))
; {*} expansion
(tcl-assert "expand-type" "expand" (get (tcl-word "{*}$list" 0) :type))
; line continuation between words
(tcl-assert "cont-words" 3 (len (tcl-cmd-words "set x \\\n 1")))
; continuation — third command word is correct
(tcl-assert "cont-word2-val" "1"
(get (tcl-part "set x \\\n 1" 2 0) :value))
; --- parser helpers ---
; tcl-parse is an alias for tcl-tokenize
(tcl-assert "parse-cmd-count" 1 (len (tcl-parse "set x 1")))
(tcl-assert "parse-2cmds" 2 (len (tcl-parse "set x 1; set y 2")))
; tcl-cmd-len
(tcl-assert "cmd-len-3" 3 (tcl-cmd-len (nth (tcl-parse "set x 1") 0)))
(tcl-assert "cmd-len-1" 1 (tcl-cmd-len (nth (tcl-parse "puts") 0)))
; tcl-word-simple? on braced word
(tcl-assert "simple-braced" true
(tcl-word-simple? (nth (get (nth (tcl-parse "{hello}") 0) :words) 0)))
; tcl-word-simple? on bare word with no subs
(tcl-assert "simple-bare" true
(tcl-word-simple? (nth (get (nth (tcl-parse "hello") 0) :words) 0)))
; tcl-word-simple? on word containing a var sub — false
(tcl-assert "simple-var-false" false
(tcl-word-simple? (nth (get (nth (tcl-parse "$x") 0) :words) 0)))
; tcl-word-simple? on word containing a cmd sub — false
(tcl-assert "simple-cmd-false" false
(tcl-word-simple? (nth (get (nth (tcl-parse "[expr 1]") 0) :words) 0)))
; tcl-word-literal on braced word
(tcl-assert "lit-braced" "hello world"
(tcl-word-literal (nth (get (nth (tcl-parse "{hello world}") 0) :words) 0)))
; tcl-word-literal on bare word
(tcl-assert "lit-bare" "hello"
(tcl-word-literal (nth (get (nth (tcl-parse "hello") 0) :words) 0)))
; tcl-word-literal on word with var sub returns nil
(tcl-assert "lit-var-nil" nil
(tcl-word-literal (nth (get (nth (tcl-parse "$x") 0) :words) 0)))
; tcl-nth-literal
(tcl-assert "nth-lit-0" "set"
(tcl-nth-literal (nth (tcl-parse "set x 1") 0) 0))
(tcl-assert "nth-lit-1" "x"
(tcl-nth-literal (nth (tcl-parse "set x 1") 0) 1))
(tcl-assert "nth-lit-2" "1"
(tcl-nth-literal (nth (tcl-parse "set x 1") 0) 2))
; tcl-nth-literal returns nil when word has subs
(tcl-assert "nth-lit-nil" nil
(tcl-nth-literal (nth (tcl-parse "set x $y") 0) 2))
(dict
"passed" tcl-parse-pass
"failed" tcl-parse-fail
"failures" tcl-parse-failures)))

View File

@@ -1,308 +0,0 @@
(define tcl-ws? (fn (c) (or (= c " ") (= c "\t") (= c "\r"))))
(define tcl-alpha?
(fn (c)
(and
(not (= c nil))
(or (and (>= c "a") (<= c "z")) (and (>= c "A") (<= c "Z"))))))
(define tcl-digit?
(fn (c) (and (not (= c nil)) (>= c "0") (<= c "9"))))
(define tcl-ident-start?
(fn (c) (or (tcl-alpha? c) (= c "_"))))
(define tcl-ident-char?
(fn (c) (or (tcl-ident-start? c) (tcl-digit? c))))
(define tcl-tokenize
(fn (src)
(let ((pos 0) (src-len (len src)) (commands (list)))
(define char-at
(fn (off)
(if (< (+ pos off) src-len) (nth src (+ pos off)) nil)))
(define cur (fn () (char-at 0)))
(define advance! (fn (n) (set! pos (+ pos n))))
(define skip-ws!
(fn ()
(when (tcl-ws? (cur))
(begin (advance! 1) (skip-ws!)))))
(define skip-to-eol!
(fn ()
(when (and (< pos src-len) (not (= (cur) "\n")))
(begin (advance! 1) (skip-to-eol!)))))
(define skip-brace-content!
(fn (d)
(when (and (< pos src-len) (> d 0))
(cond
((= (cur) "{") (begin (advance! 1) (skip-brace-content! (+ d 1))))
((= (cur) "}") (begin (advance! 1) (skip-brace-content! (- d 1))))
(else (begin (advance! 1) (skip-brace-content! d)))))))
(define skip-dquote-content!
(fn ()
(when (and (< pos src-len) (not (= (cur) "\"")))
(begin
(when (= (cur) "\\") (advance! 1))
(when (< pos src-len) (advance! 1))
(skip-dquote-content!)))))
(define parse-bs
(fn ()
(advance! 1)
(let ((c (cur)))
(cond
((= c nil) "\\")
((= c "n") (begin (advance! 1) "\n"))
((= c "t") (begin (advance! 1) "\t"))
((= c "r") (begin (advance! 1) "\r"))
((= c "\\") (begin (advance! 1) "\\"))
((= c "[") (begin (advance! 1) "["))
((= c "]") (begin (advance! 1) "]"))
((= c "{") (begin (advance! 1) "{"))
((= c "}") (begin (advance! 1) "}"))
((= c "$") (begin (advance! 1) "$"))
((= c ";") (begin (advance! 1) ";"))
((= c "\"") (begin (advance! 1) "\""))
((= c "'") (begin (advance! 1) "'"))
((= c " ") (begin (advance! 1) " "))
((= c "\n")
(begin
(advance! 1)
(skip-ws!)
" "))
(else (begin (advance! 1) (str "\\" c)))))))
(define parse-cmd-sub
(fn ()
(advance! 1)
(let ((start pos) (depth 1))
(define scan!
(fn ()
(when (and (< pos src-len) (> depth 0))
(cond
((= (cur) "[")
(begin (set! depth (+ depth 1)) (advance! 1) (scan!)))
((= (cur) "]")
(begin
(set! depth (- depth 1))
(when (> depth 0) (advance! 1))
(scan!)))
((= (cur) "{")
(begin (advance! 1) (skip-brace-content! 1) (scan!)))
((= (cur) "\"")
(begin
(advance! 1)
(skip-dquote-content!)
(when (= (cur) "\"") (advance! 1))
(scan!)))
((= (cur) "\\")
(begin (advance! 1) (when (< pos src-len) (advance! 1)) (scan!)))
(else (begin (advance! 1) (scan!)))))))
(scan!)
(let ((src-text (slice src start pos)))
(begin
(when (= (cur) "]") (advance! 1))
{:type "cmd" :src src-text})))))
(define scan-name!
(fn ()
(when (and (< pos src-len) (not (= (cur) "}")))
(begin (advance! 1) (scan-name!)))))
(define scan-ns-name!
(fn ()
(cond
((tcl-ident-char? (cur))
(begin (advance! 1) (scan-ns-name!)))
((and (= (cur) ":") (= (char-at 1) ":"))
(begin (advance! 2) (scan-ns-name!)))
(else nil))))
(define scan-klit!
(fn ()
(when (and (< pos src-len)
(not (= (cur) ")"))
(not (= (cur) "$"))
(not (= (cur) "["))
(not (= (cur) "\\")))
(begin (advance! 1) (scan-klit!)))))
(define scan-key!
(fn (kp)
(when (and (< pos src-len) (not (= (cur) ")")))
(cond
((= (cur) "$")
(begin (append! kp (parse-var-sub)) (scan-key! kp)))
((= (cur) "[")
(begin (append! kp (parse-cmd-sub)) (scan-key! kp)))
((= (cur) "\\")
(begin
(append! kp {:type "text" :value (parse-bs)})
(scan-key! kp)))
(else
(let ((kstart pos))
(begin
(scan-klit!)
(append! kp {:type "text" :value (slice src kstart pos)})
(scan-key! kp))))))))
(define parse-var-sub
(fn ()
(advance! 1)
(cond
((= (cur) "{")
(begin
(advance! 1)
(let ((start pos))
(begin
(scan-name!)
(let ((name (slice src start pos)))
(begin
(when (= (cur) "}") (advance! 1))
{:type "var" :name name}))))))
((tcl-ident-start? (cur))
(let ((start pos))
(begin
(scan-ns-name!)
(let ((name (slice src start pos)))
(if (= (cur) "(")
(begin
(advance! 1)
(let ((key-parts (list)))
(begin
(scan-key! key-parts)
(when (= (cur) ")") (advance! 1))
{:type "var-arr" :name name :key key-parts})))
{:type "var" :name name})))))
(else {:type "text" :value "$"}))))
(define scan-lit!
(fn (stop?)
(when (and (< pos src-len)
(not (stop? (cur)))
(not (= (cur) "$"))
(not (= (cur) "["))
(not (= (cur) "\\")))
(begin (advance! 1) (scan-lit! stop?)))))
(define parse-word-parts!
(fn (parts stop?)
(when (and (< pos src-len) (not (stop? (cur))))
(cond
((= (cur) "$")
(begin (append! parts (parse-var-sub)) (parse-word-parts! parts stop?)))
((= (cur) "[")
(begin (append! parts (parse-cmd-sub)) (parse-word-parts! parts stop?)))
((= (cur) "\\")
(begin
(append! parts {:type "text" :value (parse-bs)})
(parse-word-parts! parts stop?)))
(else
(let ((start pos))
(begin
(scan-lit! stop?)
(when (> pos start)
(append! parts {:type "text" :value (slice src start pos)}))
(parse-word-parts! parts stop?))))))))
(define parse-brace-word
(fn ()
(advance! 1)
(let ((depth 1) (start pos))
(define scan!
(fn ()
(when (and (< pos src-len) (> depth 0))
(cond
((= (cur) "{")
(begin (set! depth (+ depth 1)) (advance! 1) (scan!)))
((= (cur) "}")
(begin (set! depth (- depth 1)) (when (> depth 0) (advance! 1)) (scan!)))
(else (begin (advance! 1) (scan!)))))))
(scan!)
(let ((value (slice src start pos)))
(begin
(when (= (cur) "}") (advance! 1))
{:type "braced" :value value})))))
(define parse-dquote-word
(fn ()
(advance! 1)
(let ((parts (list)))
(begin
(parse-word-parts! parts (fn (c) (or (= c "\"") (= c nil))))
(when (= (cur) "\"") (advance! 1))
{:type "compound" :parts parts :quoted true}))))
(define parse-bare-word
(fn ()
(let ((parts (list)))
(begin
(parse-word-parts!
parts
(fn (c) (or (tcl-ws? c) (= c "\n") (= c ";") (= c nil))))
{:type "compound" :parts parts :quoted false}))))
(define parse-word-no-expand
(fn ()
(cond
((= (cur) "{") (parse-brace-word))
((= (cur) "\"") (parse-dquote-word))
(else (parse-bare-word)))))
(define parse-word
(fn ()
(cond
((and (= (cur) "{") (= (char-at 1) "*") (= (char-at 2) "}"))
(begin
(advance! 3)
{:type "expand" :word (parse-word-no-expand)}))
((= (cur) "{") (parse-brace-word))
((= (cur) "\"") (parse-dquote-word))
(else (parse-bare-word)))))
(define parse-words!
(fn (words)
(skip-ws!)
(cond
((or (= (cur) nil) (= (cur) "\n") (= (cur) ";")) nil)
((and (= (cur) "\\") (= (char-at 1) "\n"))
(begin (advance! 2) (skip-ws!) (parse-words! words)))
(else
(begin
(append! words (parse-word))
(parse-words! words))))))
(define skip-seps!
(fn ()
(when (< pos src-len)
(cond
((or (tcl-ws? (cur)) (= (cur) "\n") (= (cur) ";"))
(begin (advance! 1) (skip-seps!)))
((and (= (cur) "\\") (= (char-at 1) "\n"))
(begin (advance! 2) (skip-seps!)))
(else nil)))))
(define parse-all!
(fn ()
(skip-seps!)
(when (< pos src-len)
(cond
((= (cur) "#")
(begin (skip-to-eol!) (parse-all!)))
(else
(let ((words (list)))
(begin
(parse-words! words)
(when (> (len words) 0)
(append! commands {:type "command" :words words}))
(parse-all!))))))))
(parse-all!)
commands)))

View File

@@ -48,7 +48,7 @@ Core mapping:
## Roadmap
### Phase 1 — tokenizer + parser
- [ ] Tokenizer: Unicode glyphs (the full APL set: `+ - × ÷ * ⍟ ⌈ ⌊ | ! ? ○ ~ < ≤ = ≥ > ≠ ∊ ∧ ⍱ ⍲ , ⍪ ⌽ ⊖ ⍉ ↑ ↓ ⊂ ⊃ ⊆ ⍸ ⌷ ⍋ ⍒ ⊥ ⊣ ⊢ ⍎ ⍕ ⍝`), operators (`/ \ ¨ ⍨ ∘ . ⍣ ⍤ ⍥ @`), numbers (`¯` for negative, `1E2`, `1J2` complex deferred), characters (`'a'`, `''` escape), strands (juxtaposition of literals: `1 2 3`), names, comments `⍝ …`
- [x] Tokenizer: Unicode glyphs (the full APL set: `+ - × ÷ * ⍟ ⌈ ⌊ | ! ? ○ ~ < ≤ = ≥ > ≠ ∊ ∧ ⍱ ⍲ , ⍪ ⌽ ⊖ ⍉ ↑ ↓ ⊂ ⊃ ⊆ ⍸ ⌷ ⍋ ⍒ ⊥ ⊣ ⊢ ⍎ ⍕ ⍝`), operators (`/ \ ¨ ⍨ ∘ . ⍣ ⍤ ⍥ @`), numbers (`¯` for negative, `1E2`, `1J2` complex deferred), characters (`'a'`, `''` escape), strands (juxtaposition of literals: `1 2 3`), names, comments `⍝ …`
- [ ] Parser: right-to-left; classify each token as function, operator, value, or name; resolve valence positionally; dfn `{…}` body, tradfn `∇` header, guards `:`, control words `:If :While :For …` (Dyalog-style)
- [ ] Unit tests in `lib/apl/tests/parse.sx`
@@ -108,7 +108,7 @@ Core mapping:
_Newest first._
- _(none yet)_
- 2026-04-25: tokenizer (Phase 1 step 1) — 46/46 tests green; Unicode-aware starts-with? scanner for multi-byte APL glyphs; `lib/apl/tokenizer.sx` + `lib/apl/tests/parse.sx`
## Blockers

View File

@@ -50,7 +50,7 @@ Core mapping:
## Roadmap
### Phase 1 — tokenizer + parser (the Dodekalogue)
- [x] Tokenizer applying the 12 rules:
- [ ] Tokenizer applying the 12 rules:
1. Commands separated by `;` or newlines
2. Words separated by whitespace within a command
3. Double-quoted words: `\` escapes + `[…]` + `${…}` + `$var` substitution
@@ -63,8 +63,8 @@ Core mapping:
10. Order of substitution is left-to-right, single-pass
11. Substitutions don't recurse — substituted text is not re-parsed
12. The result of any substitution is the value, not a new script
- [x] Parser: script = list of commands; command = list of words; word = literal string + list of substitutions
- [x] Unit tests in `lib/tcl/tests/parse.sx`
- [ ] Parser: script = list of commands; command = list of words; word = literal string + list of substitutions
- [ ] Unit tests in `lib/tcl/tests/parse.sx`
### Phase 2 — sequential eval + core commands
- [ ] `tcl-eval-script`: walk command list, dispatch each first-word into command table
@@ -120,8 +120,7 @@ Core mapping:
_Newest first._
- 2026-04-25: Phase 1 parser — `lib/tcl/parser.sx`, word-simple?/word-literal helpers, 67 tests green, commit 6ee05259
- 2026-04-25: Phase 1 tokenizer (Dodekalogue) — `lib/tcl/tokenizer.sx`, 52 tests green, commit 666e29d5
- _(none yet)_
## Blockers