Compare commits
2 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| dbba2fe418 | |||
| c73b696494 |
83
lib/apl/tests/parse.sx
Normal file
83
lib/apl/tests/parse.sx
Normal file
@@ -0,0 +1,83 @@
|
||||
(define apl-test-count 0)
|
||||
(define apl-test-pass 0)
|
||||
(define apl-test-fails (list))
|
||||
|
||||
(define apl-test
|
||||
(fn (name actual expected)
|
||||
(begin
|
||||
(set! apl-test-count (+ apl-test-count 1))
|
||||
(if (= actual expected)
|
||||
(set! apl-test-pass (+ apl-test-pass 1))
|
||||
(append! apl-test-fails {:name name :actual actual :expected expected})))))
|
||||
|
||||
(define tok-types
|
||||
(fn (src)
|
||||
(map (fn (t) (get t :type)) (apl-tokenize src))))
|
||||
|
||||
(define tok-values
|
||||
(fn (src)
|
||||
(map (fn (t) (get t :value)) (apl-tokenize src))))
|
||||
|
||||
(define tok-count
|
||||
(fn (src)
|
||||
(len (apl-tokenize src))))
|
||||
|
||||
(define tok-type-at
|
||||
(fn (src i)
|
||||
(get (nth (apl-tokenize src) i) :type)))
|
||||
|
||||
(define tok-value-at
|
||||
(fn (src i)
|
||||
(get (nth (apl-tokenize src) i) :value)))
|
||||
|
||||
(apl-test "empty: no tokens" (tok-count "") 0)
|
||||
(apl-test "empty: whitespace only" (tok-count " ") 0)
|
||||
(apl-test "num: zero" (tok-values "0") (list 0))
|
||||
(apl-test "num: positive" (tok-values "42") (list 42))
|
||||
(apl-test "num: large" (tok-values "12345") (list 12345))
|
||||
(apl-test "num: negative" (tok-values "¯5") (list -5))
|
||||
(apl-test "num: negative zero" (tok-values "¯0") (list 0))
|
||||
(apl-test "num: strand count" (tok-count "1 2 3") 3)
|
||||
(apl-test "num: strand types" (tok-types "1 2 3") (list :num :num :num))
|
||||
(apl-test "num: strand values" (tok-values "1 2 3") (list 1 2 3))
|
||||
(apl-test "num: neg in strand" (tok-values "1 ¯2 3") (list 1 -2 3))
|
||||
(apl-test "str: empty" (tok-values "''") (list ""))
|
||||
(apl-test "str: single char" (tok-values "'a'") (list "a"))
|
||||
(apl-test "str: word" (tok-values "'hello'") (list "hello"))
|
||||
(apl-test "str: escaped quote" (tok-values "''''") (list "'"))
|
||||
(apl-test "str: type" (tok-types "'abc'") (list :str))
|
||||
(apl-test "name: simple" (tok-values "foo") (list "foo"))
|
||||
(apl-test "name: type" (tok-types "foo") (list :name))
|
||||
(apl-test "name: mixed case" (tok-values "MyVar") (list "MyVar"))
|
||||
(apl-test "name: with digits" (tok-values "x1") (list "x1"))
|
||||
(apl-test "name: system var" (tok-values "⎕IO") (list "⎕IO"))
|
||||
(apl-test "name: system var type" (tok-types "⎕IO") (list :name))
|
||||
(apl-test "glyph: plus" (tok-types "+") (list :glyph))
|
||||
(apl-test "glyph: plus value" (tok-values "+") (list "+"))
|
||||
(apl-test "glyph: iota" (tok-values "⍳") (list "⍳"))
|
||||
(apl-test "glyph: reduce" (tok-values "+/") (list "+" "/"))
|
||||
(apl-test "glyph: floor" (tok-values "⌊") (list "⌊"))
|
||||
(apl-test "glyph: rho" (tok-values "⍴") (list "⍴"))
|
||||
(apl-test "glyph: alpha omega" (tok-types "⍺ ⍵") (list :glyph :glyph))
|
||||
(apl-test "punct: lparen" (tok-types "(") (list :lparen))
|
||||
(apl-test "punct: rparen" (tok-types ")") (list :rparen))
|
||||
(apl-test "punct: brackets" (tok-types "[42]") (list :lbracket :num :rbracket))
|
||||
(apl-test "punct: braces" (tok-types "{}") (list :lbrace :rbrace))
|
||||
(apl-test "punct: semi" (tok-types ";") (list :semi))
|
||||
(apl-test "assign: arrow" (tok-types "x←1") (list :name :assign :num))
|
||||
(apl-test "diamond: separator" (tok-types "1⋄2") (list :num :diamond :num))
|
||||
(apl-test "newline: emitted" (tok-types "1\n2") (list :num :newline :num))
|
||||
(apl-test "comment: skipped" (tok-count "⍝ ignore me") 0)
|
||||
(apl-test "comment: rest ignored" (tok-count "1 ⍝ note") 1)
|
||||
(apl-test "colon: bare" (tok-types ":") (list :colon))
|
||||
(apl-test "keyword: If" (tok-values ":If") (list ":If"))
|
||||
(apl-test "keyword: type" (tok-types ":While") (list :keyword))
|
||||
(apl-test "keyword: EndFor" (tok-values ":EndFor") (list ":EndFor"))
|
||||
(apl-test "expr: +/ ⍳ 5" (tok-types "+/ ⍳ 5") (list :glyph :glyph :glyph :num))
|
||||
(apl-test "expr: x←42" (tok-count "x←42") 3)
|
||||
(apl-test "expr: dfn body" (tok-types "{⍺+⍵}")
|
||||
(list :lbrace :glyph :glyph :glyph :rbrace))
|
||||
|
||||
(define apl-tokenize-test-summary
|
||||
(str "tokenizer " apl-test-pass "/" apl-test-count
|
||||
(if (= (len apl-test-fails) 0) "" (str " FAILS: " apl-test-fails))))
|
||||
168
lib/apl/tokenizer.sx
Normal file
168
lib/apl/tokenizer.sx
Normal file
@@ -0,0 +1,168 @@
|
||||
(define apl-glyph-set
|
||||
(list "+" "-" "×" "÷" "*" "⍟" "⌈" "⌊" "|" "!" "?" "○" "~" "<" "≤" "=" "≥" ">" "≠"
|
||||
"∊" "∧" "∨" "⍱" "⍲" "," "⍪" "⍴" "⌽" "⊖" "⍉" "↑" "↓" "⊂" "⊃" "⊆"
|
||||
"∪" "∩" "⍳" "⍸" "⌷" "⍋" "⍒" "⊥" "⊤" "⊣" "⊢" "⍎" "⍕"
|
||||
"⍺" "⍵" "∇" "/" "\\" "¨" "⍨" "∘" "." "⍣" "⍤" "⍥" "@" "¯"))
|
||||
|
||||
(define apl-glyph?
|
||||
(fn (ch)
|
||||
(some (fn (g) (= g ch)) apl-glyph-set)))
|
||||
|
||||
(define apl-digit?
|
||||
(fn (ch)
|
||||
(and (string? ch) (>= ch "0") (<= ch "9"))))
|
||||
|
||||
(define apl-alpha?
|
||||
(fn (ch)
|
||||
(and (string? ch)
|
||||
(or (and (>= ch "a") (<= ch "z"))
|
||||
(and (>= ch "A") (<= ch "Z"))
|
||||
(= ch "_")))))
|
||||
|
||||
(define apl-tokenize
|
||||
(fn (source)
|
||||
(let ((pos 0)
|
||||
(src-len (len source))
|
||||
(tokens (list)))
|
||||
|
||||
(define tok-push!
|
||||
(fn (type value)
|
||||
(append! tokens {:type type :value value})))
|
||||
|
||||
(define cur-sw?
|
||||
(fn (ch)
|
||||
(and (< pos src-len) (starts-with? (slice source pos) ch))))
|
||||
|
||||
(define cur-byte
|
||||
(fn ()
|
||||
(if (< pos src-len) (nth source pos) nil)))
|
||||
|
||||
(define advance!
|
||||
(fn ()
|
||||
(set! pos (+ pos 1))))
|
||||
|
||||
(define consume!
|
||||
(fn (ch)
|
||||
(set! pos (+ pos (len ch)))))
|
||||
|
||||
(define find-glyph
|
||||
(fn ()
|
||||
(let ((rem (slice source pos)))
|
||||
(let ((matches (filter (fn (g) (starts-with? rem g)) apl-glyph-set)))
|
||||
(if (> (len matches) 0) (first matches) nil)))))
|
||||
|
||||
(define read-digits!
|
||||
(fn (acc)
|
||||
(if (and (< pos src-len) (apl-digit? (cur-byte)))
|
||||
(let ((ch (cur-byte)))
|
||||
(begin
|
||||
(advance!)
|
||||
(read-digits! (str acc ch))))
|
||||
acc)))
|
||||
|
||||
(define read-ident-cont!
|
||||
(fn ()
|
||||
(when (and (< pos src-len)
|
||||
(let ((ch (cur-byte)))
|
||||
(or (apl-alpha? ch) (apl-digit? ch))))
|
||||
(begin
|
||||
(advance!)
|
||||
(read-ident-cont!)))))
|
||||
|
||||
(define read-string!
|
||||
(fn (acc)
|
||||
(cond
|
||||
((>= pos src-len) acc)
|
||||
((cur-sw? "'")
|
||||
(if (and (< (+ pos 1) src-len) (cur-sw? "'"))
|
||||
(begin
|
||||
(advance!)
|
||||
(advance!)
|
||||
(read-string! (str acc "'")))
|
||||
(begin (advance!) acc)))
|
||||
(true
|
||||
(let ((ch (cur-byte)))
|
||||
(begin
|
||||
(advance!)
|
||||
(read-string! (str acc ch))))))))
|
||||
|
||||
(define skip-line!
|
||||
(fn ()
|
||||
(when (and (< pos src-len) (not (cur-sw? "\n")))
|
||||
(begin
|
||||
(advance!)
|
||||
(skip-line!)))))
|
||||
|
||||
(define scan!
|
||||
(fn ()
|
||||
(when (< pos src-len)
|
||||
(let ((ch (cur-byte)))
|
||||
(cond
|
||||
((or (= ch " ") (= ch "\t") (= ch "\r"))
|
||||
(begin (advance!) (scan!)))
|
||||
((= ch "\n")
|
||||
(begin (advance!) (tok-push! :newline nil) (scan!)))
|
||||
((cur-sw? "⍝")
|
||||
(begin (skip-line!) (scan!)))
|
||||
((cur-sw? "⋄")
|
||||
(begin (consume! "⋄") (tok-push! :diamond nil) (scan!)))
|
||||
((= ch "(")
|
||||
(begin (advance!) (tok-push! :lparen nil) (scan!)))
|
||||
((= ch ")")
|
||||
(begin (advance!) (tok-push! :rparen nil) (scan!)))
|
||||
((= ch "[")
|
||||
(begin (advance!) (tok-push! :lbracket nil) (scan!)))
|
||||
((= ch "]")
|
||||
(begin (advance!) (tok-push! :rbracket nil) (scan!)))
|
||||
((= ch "{")
|
||||
(begin (advance!) (tok-push! :lbrace nil) (scan!)))
|
||||
((= ch "}")
|
||||
(begin (advance!) (tok-push! :rbrace nil) (scan!)))
|
||||
((= ch ";")
|
||||
(begin (advance!) (tok-push! :semi nil) (scan!)))
|
||||
((cur-sw? "←")
|
||||
(begin (consume! "←") (tok-push! :assign nil) (scan!)))
|
||||
((= ch ":")
|
||||
(let ((start pos))
|
||||
(begin
|
||||
(advance!)
|
||||
(if (and (< pos src-len) (apl-alpha? (cur-byte)))
|
||||
(begin
|
||||
(read-ident-cont!)
|
||||
(tok-push! :keyword (slice source start pos)))
|
||||
(tok-push! :colon nil))
|
||||
(scan!))))
|
||||
((and (cur-sw? "¯")
|
||||
(< (+ pos (len "¯")) src-len)
|
||||
(apl-digit? (nth source (+ pos (len "¯")))))
|
||||
(begin
|
||||
(consume! "¯")
|
||||
(let ((digits (read-digits! "")))
|
||||
(tok-push! :num (- 0 (parse-int digits 0))))
|
||||
(scan!)))
|
||||
((apl-digit? ch)
|
||||
(begin
|
||||
(let ((digits (read-digits! "")))
|
||||
(tok-push! :num (parse-int digits 0)))
|
||||
(scan!)))
|
||||
((= ch "'")
|
||||
(begin
|
||||
(advance!)
|
||||
(let ((s (read-string! "")))
|
||||
(tok-push! :str s))
|
||||
(scan!)))
|
||||
((or (apl-alpha? ch) (cur-sw? "⎕"))
|
||||
(let ((start pos))
|
||||
(begin
|
||||
(if (cur-sw? "⎕") (consume! "⎕") (advance!))
|
||||
(read-ident-cont!)
|
||||
(tok-push! :name (slice source start pos))
|
||||
(scan!))))
|
||||
(true
|
||||
(let ((g (find-glyph)))
|
||||
(if g
|
||||
(begin (consume! g) (tok-push! :glyph g) (scan!))
|
||||
(begin (advance!) (scan!))))))))))
|
||||
|
||||
(scan!)
|
||||
tokens)))
|
||||
@@ -1,41 +0,0 @@
|
||||
; Tcl parser — thin layer over tcl-tokenize
|
||||
; Adds tcl-parse entry point and word utility fns
|
||||
|
||||
; Entry point: parse Tcl source to a list of commands.
|
||||
; Returns same structure as tcl-tokenize.
|
||||
(define tcl-parse (fn (src) (tcl-tokenize src)))
|
||||
|
||||
; True if word has no substitutions — value can be read statically.
|
||||
; braced words are always simple. compound words are simple when all
|
||||
; parts are plain text with no var/cmd parts.
|
||||
(define tcl-word-simple?
|
||||
(fn (word)
|
||||
(cond
|
||||
((= (get word :type) "braced") true)
|
||||
((= (get word :type) "compound")
|
||||
(let ((parts (get word :parts)))
|
||||
(every? (fn (p) (= (get p :type) "text")) parts)))
|
||||
(else false))))
|
||||
|
||||
; Concatenate text parts of a simple word into a single string.
|
||||
; For braced words returns :value directly.
|
||||
; For compound words with only text parts, joins them.
|
||||
; Returns nil for words with substitutions.
|
||||
(define tcl-word-literal
|
||||
(fn (word)
|
||||
(cond
|
||||
((= (get word :type) "braced") (get word :value))
|
||||
((= (get word :type) "compound")
|
||||
(if (tcl-word-simple? word)
|
||||
(join "" (map (fn (p) (get p :value)) (get word :parts)))
|
||||
nil))
|
||||
(else nil))))
|
||||
|
||||
; Number of words in a parsed command.
|
||||
(define tcl-cmd-len
|
||||
(fn (cmd) (len (get cmd :words))))
|
||||
|
||||
; Nth word literal from a command (index 0 = command name).
|
||||
; Returns nil if word has substitutions.
|
||||
(define tcl-nth-literal
|
||||
(fn (cmd n) (tcl-word-literal (nth (get cmd :words) n))))
|
||||
@@ -1,53 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
# Tcl-on-SX test runner — epoch protocol to sx_server.exe
|
||||
set -uo pipefail
|
||||
cd "$(git rev-parse --show-toplevel)"
|
||||
|
||||
SX_SERVER="${SX_SERVER:-hosts/ocaml/_build/default/bin/sx_server.exe}"
|
||||
if [ ! -x "$SX_SERVER" ]; then
|
||||
SX_SERVER="/root/rose-ash/hosts/ocaml/_build/default/bin/sx_server.exe"
|
||||
fi
|
||||
if [ ! -x "$SX_SERVER" ]; then echo "ERROR: sx_server.exe not found"; exit 1; fi
|
||||
|
||||
VERBOSE="${1:-}"
|
||||
TMPFILE=$(mktemp)
|
||||
trap "rm -f $TMPFILE" EXIT
|
||||
|
||||
cat > "$TMPFILE" << 'EPOCHS'
|
||||
(epoch 1)
|
||||
(load "lib/tcl/tokenizer.sx")
|
||||
(epoch 2)
|
||||
(load "lib/tcl/parser.sx")
|
||||
(epoch 3)
|
||||
(load "lib/tcl/tests/parse.sx")
|
||||
(epoch 4)
|
||||
(eval "(tcl-run-parse-tests)")
|
||||
EPOCHS
|
||||
|
||||
OUTPUT=$(timeout 30 "$SX_SERVER" < "$TMPFILE" 2>&1)
|
||||
[ "$VERBOSE" = "-v" ] && echo "$OUTPUT"
|
||||
|
||||
# Result follows an (ok-len 3 N) line
|
||||
RESULT=$(echo "$OUTPUT" | grep -A1 "^(ok-len 4 " | tail -1)
|
||||
if [ -z "$RESULT" ]; then
|
||||
RESULT=$(echo "$OUTPUT" | grep "^(ok 4 " | sed 's/^(ok 3 //' | sed 's/)$//')
|
||||
fi
|
||||
if [ -z "$RESULT" ]; then
|
||||
echo "ERROR: no result from epoch 4"
|
||||
echo "$OUTPUT" | tail -10
|
||||
exit 1
|
||||
fi
|
||||
|
||||
PASSED=$(echo "$RESULT" | grep -o ':passed [0-9]*' | grep -o '[0-9]*$')
|
||||
FAILED=$(echo "$RESULT" | grep -o ':failed [0-9]*' | grep -o '[0-9]*$')
|
||||
PASSED=${PASSED:-0}; FAILED=${FAILED:-1}
|
||||
TOTAL=$((PASSED + FAILED))
|
||||
|
||||
if [ "$FAILED" = "0" ]; then
|
||||
echo "ok $PASSED/$TOTAL tcl-tokenize tests passed"
|
||||
exit 0
|
||||
else
|
||||
echo "FAIL $PASSED/$TOTAL passed, $FAILED failed"
|
||||
echo "$RESULT"
|
||||
exit 1
|
||||
fi
|
||||
@@ -1,186 +0,0 @@
|
||||
(define tcl-parse-pass 0)
|
||||
(define tcl-parse-fail 0)
|
||||
(define tcl-parse-failures (list))
|
||||
|
||||
(define tcl-assert
|
||||
(fn (label expected actual)
|
||||
(if (= expected actual)
|
||||
(set! tcl-parse-pass (+ tcl-parse-pass 1))
|
||||
(begin
|
||||
(set! tcl-parse-fail (+ tcl-parse-fail 1))
|
||||
(append! tcl-parse-failures
|
||||
(str label ": expected=" (str expected) " got=" (str actual)))))))
|
||||
|
||||
(define tcl-first-cmd
|
||||
(fn (src) (nth (tcl-tokenize src) 0)))
|
||||
|
||||
(define tcl-cmd-words
|
||||
(fn (src) (get (tcl-first-cmd src) :words)))
|
||||
|
||||
(define tcl-word
|
||||
(fn (src wi) (nth (tcl-cmd-words src) wi)))
|
||||
|
||||
(define tcl-parts
|
||||
(fn (src wi) (get (tcl-word src wi) :parts)))
|
||||
|
||||
(define tcl-part
|
||||
(fn (src wi pi) (nth (tcl-parts src wi) pi)))
|
||||
|
||||
(define tcl-run-parse-tests
|
||||
(fn ()
|
||||
(set! tcl-parse-pass 0)
|
||||
(set! tcl-parse-fail 0)
|
||||
(set! tcl-parse-failures (list))
|
||||
|
||||
; empty / whitespace-only
|
||||
(tcl-assert "empty" 0 (len (tcl-tokenize "")))
|
||||
(tcl-assert "ws-only" 0 (len (tcl-tokenize " ")))
|
||||
(tcl-assert "nl-only" 0 (len (tcl-tokenize "\n\n")))
|
||||
|
||||
; single command word count
|
||||
(tcl-assert "1word" 1 (len (tcl-cmd-words "set")))
|
||||
(tcl-assert "3words" 3 (len (tcl-cmd-words "set x 1")))
|
||||
(tcl-assert "4words" 4 (len (tcl-cmd-words "set a b c")))
|
||||
|
||||
; word type — bare word is compound
|
||||
(tcl-assert "bare-type" "compound" (get (tcl-word "set x 1" 0) :type))
|
||||
(tcl-assert "bare-quoted" false (get (tcl-word "set x 1" 0) :quoted))
|
||||
(tcl-assert "bare-part-type" "text" (get (tcl-part "set x 1" 0 0) :type))
|
||||
(tcl-assert "bare-part-val" "set" (get (tcl-part "set x 1" 0 0) :value))
|
||||
(tcl-assert "bare-part2-val" "x" (get (tcl-part "set x 1" 1 0) :value))
|
||||
(tcl-assert "bare-part3-val" "1" (get (tcl-part "set x 1" 2 0) :value))
|
||||
|
||||
; multiple commands
|
||||
(tcl-assert "semi-sep" 2 (len (tcl-tokenize "set x 1; set y 2")))
|
||||
(tcl-assert "nl-sep" 2 (len (tcl-tokenize "set x 1\nset y 2")))
|
||||
(tcl-assert "multi-nl" 3 (len (tcl-tokenize "a\nb\nc")))
|
||||
|
||||
; comments
|
||||
(tcl-assert "comment-only" 0 (len (tcl-tokenize "# comment")))
|
||||
(tcl-assert "comment-nl" 0 (len (tcl-tokenize "# comment\n")))
|
||||
(tcl-assert "comment-then-cmd" 1 (len (tcl-tokenize "# comment\nset x 1")))
|
||||
(tcl-assert "semi-then-comment" 1 (len (tcl-tokenize "set x 1; # comment")))
|
||||
|
||||
; brace-quoted words
|
||||
(tcl-assert "brace-type" "braced" (get (tcl-word "{hello}" 0) :type))
|
||||
(tcl-assert "brace-value" "hello" (get (tcl-word "{hello}" 0) :value))
|
||||
(tcl-assert "brace-spaces" "hello world" (get (tcl-word "{hello world}" 0) :value))
|
||||
(tcl-assert "brace-nested" "a {b} c" (get (tcl-word "{a {b} c}" 0) :value))
|
||||
(tcl-assert "brace-no-var-sub" "hello $x" (get (tcl-word "{hello $x}" 0) :value))
|
||||
(tcl-assert "brace-no-cmd-sub" "[expr 1]" (get (tcl-word "{[expr 1]}" 0) :value))
|
||||
|
||||
; double-quoted words
|
||||
(tcl-assert "dq-type" "compound" (get (tcl-word "\"hello\"" 0) :type))
|
||||
(tcl-assert "dq-quoted" true (get (tcl-word "\"hello\"" 0) :quoted))
|
||||
(tcl-assert "dq-literal" "hello" (get (tcl-part "\"hello\"" 0 0) :value))
|
||||
|
||||
; variable substitution in bare word
|
||||
(tcl-assert "var-type" "var" (get (tcl-part "$x" 0 0) :type))
|
||||
(tcl-assert "var-name" "x" (get (tcl-part "$x" 0 0) :name))
|
||||
(tcl-assert "var-long" "long_name" (get (tcl-part "$long_name" 0 0) :name))
|
||||
|
||||
; ${name} form
|
||||
(tcl-assert "var-brace-type" "var" (get (tcl-part "${x}" 0 0) :type))
|
||||
(tcl-assert "var-brace-name" "x" (get (tcl-part "${x}" 0 0) :name))
|
||||
|
||||
; array variable substitution
|
||||
(tcl-assert "arr-type" "var-arr" (get (tcl-part "$arr(key)" 0 0) :type))
|
||||
(tcl-assert "arr-name" "arr" (get (tcl-part "$arr(key)" 0 0) :name))
|
||||
(tcl-assert "arr-key-len" 1 (len (get (tcl-part "$arr(key)" 0 0) :key)))
|
||||
(tcl-assert "arr-key-text" "key"
|
||||
(get (nth (get (tcl-part "$arr(key)" 0 0) :key) 0) :value))
|
||||
|
||||
; command substitution
|
||||
(tcl-assert "cmd-type" "cmd" (get (tcl-part "[expr 1+1]" 0 0) :type))
|
||||
(tcl-assert "cmd-src" "expr 1+1" (get (tcl-part "[expr 1+1]" 0 0) :src))
|
||||
|
||||
; nested command substitution
|
||||
(tcl-assert "cmd-nested-src" "expr [string length x]"
|
||||
(get (tcl-part "[expr [string length x]]" 0 0) :src))
|
||||
|
||||
; backslash substitution in double-quoted word
|
||||
(let ((ps (tcl-parts "\"a\\nb\"" 0)))
|
||||
(begin
|
||||
(tcl-assert "bs-n-part0" "a" (get (nth ps 0) :value))
|
||||
(tcl-assert "bs-n-part1" "\n" (get (nth ps 1) :value))
|
||||
(tcl-assert "bs-n-part2" "b" (get (nth ps 2) :value))))
|
||||
|
||||
(let ((ps (tcl-parts "\"a\\tb\"" 0)))
|
||||
(tcl-assert "bs-t-part1" "\t" (get (nth ps 1) :value)))
|
||||
|
||||
(let ((ps (tcl-parts "\"a\\\\b\"" 0)))
|
||||
(tcl-assert "bs-bs-part1" "\\" (get (nth ps 1) :value)))
|
||||
|
||||
; mixed word: text + var + text in double-quoted
|
||||
(let ((ps (tcl-parts "\"hello $name!\"" 0)))
|
||||
(begin
|
||||
(tcl-assert "mixed-text0" "hello " (get (nth ps 0) :value))
|
||||
(tcl-assert "mixed-var1-type" "var" (get (nth ps 1) :type))
|
||||
(tcl-assert "mixed-var1-name" "name" (get (nth ps 1) :name))
|
||||
(tcl-assert "mixed-text2" "!" (get (nth ps 2) :value))))
|
||||
|
||||
; {*} expansion
|
||||
(tcl-assert "expand-type" "expand" (get (tcl-word "{*}$list" 0) :type))
|
||||
|
||||
; line continuation between words
|
||||
(tcl-assert "cont-words" 3 (len (tcl-cmd-words "set x \\\n 1")))
|
||||
|
||||
; continuation — third command word is correct
|
||||
(tcl-assert "cont-word2-val" "1"
|
||||
(get (tcl-part "set x \\\n 1" 2 0) :value))
|
||||
|
||||
|
||||
; --- parser helpers ---
|
||||
; tcl-parse is an alias for tcl-tokenize
|
||||
(tcl-assert "parse-cmd-count" 1 (len (tcl-parse "set x 1")))
|
||||
(tcl-assert "parse-2cmds" 2 (len (tcl-parse "set x 1; set y 2")))
|
||||
|
||||
; tcl-cmd-len
|
||||
(tcl-assert "cmd-len-3" 3 (tcl-cmd-len (nth (tcl-parse "set x 1") 0)))
|
||||
(tcl-assert "cmd-len-1" 1 (tcl-cmd-len (nth (tcl-parse "puts") 0)))
|
||||
|
||||
; tcl-word-simple? on braced word
|
||||
(tcl-assert "simple-braced" true
|
||||
(tcl-word-simple? (nth (get (nth (tcl-parse "{hello}") 0) :words) 0)))
|
||||
|
||||
; tcl-word-simple? on bare word with no subs
|
||||
(tcl-assert "simple-bare" true
|
||||
(tcl-word-simple? (nth (get (nth (tcl-parse "hello") 0) :words) 0)))
|
||||
|
||||
; tcl-word-simple? on word containing a var sub — false
|
||||
(tcl-assert "simple-var-false" false
|
||||
(tcl-word-simple? (nth (get (nth (tcl-parse "$x") 0) :words) 0)))
|
||||
|
||||
; tcl-word-simple? on word containing a cmd sub — false
|
||||
(tcl-assert "simple-cmd-false" false
|
||||
(tcl-word-simple? (nth (get (nth (tcl-parse "[expr 1]") 0) :words) 0)))
|
||||
|
||||
; tcl-word-literal on braced word
|
||||
(tcl-assert "lit-braced" "hello world"
|
||||
(tcl-word-literal (nth (get (nth (tcl-parse "{hello world}") 0) :words) 0)))
|
||||
|
||||
; tcl-word-literal on bare word
|
||||
(tcl-assert "lit-bare" "hello"
|
||||
(tcl-word-literal (nth (get (nth (tcl-parse "hello") 0) :words) 0)))
|
||||
|
||||
; tcl-word-literal on word with var sub returns nil
|
||||
(tcl-assert "lit-var-nil" nil
|
||||
(tcl-word-literal (nth (get (nth (tcl-parse "$x") 0) :words) 0)))
|
||||
|
||||
; tcl-nth-literal
|
||||
(tcl-assert "nth-lit-0" "set"
|
||||
(tcl-nth-literal (nth (tcl-parse "set x 1") 0) 0))
|
||||
(tcl-assert "nth-lit-1" "x"
|
||||
(tcl-nth-literal (nth (tcl-parse "set x 1") 0) 1))
|
||||
(tcl-assert "nth-lit-2" "1"
|
||||
(tcl-nth-literal (nth (tcl-parse "set x 1") 0) 2))
|
||||
|
||||
; tcl-nth-literal returns nil when word has subs
|
||||
(tcl-assert "nth-lit-nil" nil
|
||||
(tcl-nth-literal (nth (tcl-parse "set x $y") 0) 2))
|
||||
|
||||
|
||||
(dict
|
||||
"passed" tcl-parse-pass
|
||||
"failed" tcl-parse-fail
|
||||
"failures" tcl-parse-failures)))
|
||||
@@ -1,308 +0,0 @@
|
||||
(define tcl-ws? (fn (c) (or (= c " ") (= c "\t") (= c "\r"))))
|
||||
|
||||
(define tcl-alpha?
|
||||
(fn (c)
|
||||
(and
|
||||
(not (= c nil))
|
||||
(or (and (>= c "a") (<= c "z")) (and (>= c "A") (<= c "Z"))))))
|
||||
|
||||
(define tcl-digit?
|
||||
(fn (c) (and (not (= c nil)) (>= c "0") (<= c "9"))))
|
||||
|
||||
(define tcl-ident-start?
|
||||
(fn (c) (or (tcl-alpha? c) (= c "_"))))
|
||||
|
||||
(define tcl-ident-char?
|
||||
(fn (c) (or (tcl-ident-start? c) (tcl-digit? c))))
|
||||
|
||||
(define tcl-tokenize
|
||||
(fn (src)
|
||||
(let ((pos 0) (src-len (len src)) (commands (list)))
|
||||
|
||||
(define char-at
|
||||
(fn (off)
|
||||
(if (< (+ pos off) src-len) (nth src (+ pos off)) nil)))
|
||||
|
||||
(define cur (fn () (char-at 0)))
|
||||
|
||||
(define advance! (fn (n) (set! pos (+ pos n))))
|
||||
|
||||
(define skip-ws!
|
||||
(fn ()
|
||||
(when (tcl-ws? (cur))
|
||||
(begin (advance! 1) (skip-ws!)))))
|
||||
|
||||
(define skip-to-eol!
|
||||
(fn ()
|
||||
(when (and (< pos src-len) (not (= (cur) "\n")))
|
||||
(begin (advance! 1) (skip-to-eol!)))))
|
||||
|
||||
(define skip-brace-content!
|
||||
(fn (d)
|
||||
(when (and (< pos src-len) (> d 0))
|
||||
(cond
|
||||
((= (cur) "{") (begin (advance! 1) (skip-brace-content! (+ d 1))))
|
||||
((= (cur) "}") (begin (advance! 1) (skip-brace-content! (- d 1))))
|
||||
(else (begin (advance! 1) (skip-brace-content! d)))))))
|
||||
|
||||
(define skip-dquote-content!
|
||||
(fn ()
|
||||
(when (and (< pos src-len) (not (= (cur) "\"")))
|
||||
(begin
|
||||
(when (= (cur) "\\") (advance! 1))
|
||||
(when (< pos src-len) (advance! 1))
|
||||
(skip-dquote-content!)))))
|
||||
|
||||
(define parse-bs
|
||||
(fn ()
|
||||
(advance! 1)
|
||||
(let ((c (cur)))
|
||||
(cond
|
||||
((= c nil) "\\")
|
||||
((= c "n") (begin (advance! 1) "\n"))
|
||||
((= c "t") (begin (advance! 1) "\t"))
|
||||
((= c "r") (begin (advance! 1) "\r"))
|
||||
((= c "\\") (begin (advance! 1) "\\"))
|
||||
((= c "[") (begin (advance! 1) "["))
|
||||
((= c "]") (begin (advance! 1) "]"))
|
||||
((= c "{") (begin (advance! 1) "{"))
|
||||
((= c "}") (begin (advance! 1) "}"))
|
||||
((= c "$") (begin (advance! 1) "$"))
|
||||
((= c ";") (begin (advance! 1) ";"))
|
||||
((= c "\"") (begin (advance! 1) "\""))
|
||||
((= c "'") (begin (advance! 1) "'"))
|
||||
((= c " ") (begin (advance! 1) " "))
|
||||
((= c "\n")
|
||||
(begin
|
||||
(advance! 1)
|
||||
(skip-ws!)
|
||||
" "))
|
||||
(else (begin (advance! 1) (str "\\" c)))))))
|
||||
|
||||
(define parse-cmd-sub
|
||||
(fn ()
|
||||
(advance! 1)
|
||||
(let ((start pos) (depth 1))
|
||||
(define scan!
|
||||
(fn ()
|
||||
(when (and (< pos src-len) (> depth 0))
|
||||
(cond
|
||||
((= (cur) "[")
|
||||
(begin (set! depth (+ depth 1)) (advance! 1) (scan!)))
|
||||
((= (cur) "]")
|
||||
(begin
|
||||
(set! depth (- depth 1))
|
||||
(when (> depth 0) (advance! 1))
|
||||
(scan!)))
|
||||
((= (cur) "{")
|
||||
(begin (advance! 1) (skip-brace-content! 1) (scan!)))
|
||||
((= (cur) "\"")
|
||||
(begin
|
||||
(advance! 1)
|
||||
(skip-dquote-content!)
|
||||
(when (= (cur) "\"") (advance! 1))
|
||||
(scan!)))
|
||||
((= (cur) "\\")
|
||||
(begin (advance! 1) (when (< pos src-len) (advance! 1)) (scan!)))
|
||||
(else (begin (advance! 1) (scan!)))))))
|
||||
(scan!)
|
||||
(let ((src-text (slice src start pos)))
|
||||
(begin
|
||||
(when (= (cur) "]") (advance! 1))
|
||||
{:type "cmd" :src src-text})))))
|
||||
|
||||
(define scan-name!
|
||||
(fn ()
|
||||
(when (and (< pos src-len) (not (= (cur) "}")))
|
||||
(begin (advance! 1) (scan-name!)))))
|
||||
|
||||
(define scan-ns-name!
|
||||
(fn ()
|
||||
(cond
|
||||
((tcl-ident-char? (cur))
|
||||
(begin (advance! 1) (scan-ns-name!)))
|
||||
((and (= (cur) ":") (= (char-at 1) ":"))
|
||||
(begin (advance! 2) (scan-ns-name!)))
|
||||
(else nil))))
|
||||
|
||||
(define scan-klit!
|
||||
(fn ()
|
||||
(when (and (< pos src-len)
|
||||
(not (= (cur) ")"))
|
||||
(not (= (cur) "$"))
|
||||
(not (= (cur) "["))
|
||||
(not (= (cur) "\\")))
|
||||
(begin (advance! 1) (scan-klit!)))))
|
||||
|
||||
(define scan-key!
|
||||
(fn (kp)
|
||||
(when (and (< pos src-len) (not (= (cur) ")")))
|
||||
(cond
|
||||
((= (cur) "$")
|
||||
(begin (append! kp (parse-var-sub)) (scan-key! kp)))
|
||||
((= (cur) "[")
|
||||
(begin (append! kp (parse-cmd-sub)) (scan-key! kp)))
|
||||
((= (cur) "\\")
|
||||
(begin
|
||||
(append! kp {:type "text" :value (parse-bs)})
|
||||
(scan-key! kp)))
|
||||
(else
|
||||
(let ((kstart pos))
|
||||
(begin
|
||||
(scan-klit!)
|
||||
(append! kp {:type "text" :value (slice src kstart pos)})
|
||||
(scan-key! kp))))))))
|
||||
|
||||
(define parse-var-sub
|
||||
(fn ()
|
||||
(advance! 1)
|
||||
(cond
|
||||
((= (cur) "{")
|
||||
(begin
|
||||
(advance! 1)
|
||||
(let ((start pos))
|
||||
(begin
|
||||
(scan-name!)
|
||||
(let ((name (slice src start pos)))
|
||||
(begin
|
||||
(when (= (cur) "}") (advance! 1))
|
||||
{:type "var" :name name}))))))
|
||||
((tcl-ident-start? (cur))
|
||||
(let ((start pos))
|
||||
(begin
|
||||
(scan-ns-name!)
|
||||
(let ((name (slice src start pos)))
|
||||
(if (= (cur) "(")
|
||||
(begin
|
||||
(advance! 1)
|
||||
(let ((key-parts (list)))
|
||||
(begin
|
||||
(scan-key! key-parts)
|
||||
(when (= (cur) ")") (advance! 1))
|
||||
{:type "var-arr" :name name :key key-parts})))
|
||||
{:type "var" :name name})))))
|
||||
(else {:type "text" :value "$"}))))
|
||||
|
||||
(define scan-lit!
|
||||
(fn (stop?)
|
||||
(when (and (< pos src-len)
|
||||
(not (stop? (cur)))
|
||||
(not (= (cur) "$"))
|
||||
(not (= (cur) "["))
|
||||
(not (= (cur) "\\")))
|
||||
(begin (advance! 1) (scan-lit! stop?)))))
|
||||
|
||||
(define parse-word-parts!
|
||||
(fn (parts stop?)
|
||||
(when (and (< pos src-len) (not (stop? (cur))))
|
||||
(cond
|
||||
((= (cur) "$")
|
||||
(begin (append! parts (parse-var-sub)) (parse-word-parts! parts stop?)))
|
||||
((= (cur) "[")
|
||||
(begin (append! parts (parse-cmd-sub)) (parse-word-parts! parts stop?)))
|
||||
((= (cur) "\\")
|
||||
(begin
|
||||
(append! parts {:type "text" :value (parse-bs)})
|
||||
(parse-word-parts! parts stop?)))
|
||||
(else
|
||||
(let ((start pos))
|
||||
(begin
|
||||
(scan-lit! stop?)
|
||||
(when (> pos start)
|
||||
(append! parts {:type "text" :value (slice src start pos)}))
|
||||
(parse-word-parts! parts stop?))))))))
|
||||
|
||||
(define parse-brace-word
|
||||
(fn ()
|
||||
(advance! 1)
|
||||
(let ((depth 1) (start pos))
|
||||
(define scan!
|
||||
(fn ()
|
||||
(when (and (< pos src-len) (> depth 0))
|
||||
(cond
|
||||
((= (cur) "{")
|
||||
(begin (set! depth (+ depth 1)) (advance! 1) (scan!)))
|
||||
((= (cur) "}")
|
||||
(begin (set! depth (- depth 1)) (when (> depth 0) (advance! 1)) (scan!)))
|
||||
(else (begin (advance! 1) (scan!)))))))
|
||||
(scan!)
|
||||
(let ((value (slice src start pos)))
|
||||
(begin
|
||||
(when (= (cur) "}") (advance! 1))
|
||||
{:type "braced" :value value})))))
|
||||
|
||||
(define parse-dquote-word
|
||||
(fn ()
|
||||
(advance! 1)
|
||||
(let ((parts (list)))
|
||||
(begin
|
||||
(parse-word-parts! parts (fn (c) (or (= c "\"") (= c nil))))
|
||||
(when (= (cur) "\"") (advance! 1))
|
||||
{:type "compound" :parts parts :quoted true}))))
|
||||
|
||||
(define parse-bare-word
|
||||
(fn ()
|
||||
(let ((parts (list)))
|
||||
(begin
|
||||
(parse-word-parts!
|
||||
parts
|
||||
(fn (c) (or (tcl-ws? c) (= c "\n") (= c ";") (= c nil))))
|
||||
{:type "compound" :parts parts :quoted false}))))
|
||||
|
||||
(define parse-word-no-expand
|
||||
(fn ()
|
||||
(cond
|
||||
((= (cur) "{") (parse-brace-word))
|
||||
((= (cur) "\"") (parse-dquote-word))
|
||||
(else (parse-bare-word)))))
|
||||
|
||||
(define parse-word
|
||||
(fn ()
|
||||
(cond
|
||||
((and (= (cur) "{") (= (char-at 1) "*") (= (char-at 2) "}"))
|
||||
(begin
|
||||
(advance! 3)
|
||||
{:type "expand" :word (parse-word-no-expand)}))
|
||||
((= (cur) "{") (parse-brace-word))
|
||||
((= (cur) "\"") (parse-dquote-word))
|
||||
(else (parse-bare-word)))))
|
||||
|
||||
(define parse-words!
|
||||
(fn (words)
|
||||
(skip-ws!)
|
||||
(cond
|
||||
((or (= (cur) nil) (= (cur) "\n") (= (cur) ";")) nil)
|
||||
((and (= (cur) "\\") (= (char-at 1) "\n"))
|
||||
(begin (advance! 2) (skip-ws!) (parse-words! words)))
|
||||
(else
|
||||
(begin
|
||||
(append! words (parse-word))
|
||||
(parse-words! words))))))
|
||||
|
||||
(define skip-seps!
|
||||
(fn ()
|
||||
(when (< pos src-len)
|
||||
(cond
|
||||
((or (tcl-ws? (cur)) (= (cur) "\n") (= (cur) ";"))
|
||||
(begin (advance! 1) (skip-seps!)))
|
||||
((and (= (cur) "\\") (= (char-at 1) "\n"))
|
||||
(begin (advance! 2) (skip-seps!)))
|
||||
(else nil)))))
|
||||
|
||||
(define parse-all!
|
||||
(fn ()
|
||||
(skip-seps!)
|
||||
(when (< pos src-len)
|
||||
(cond
|
||||
((= (cur) "#")
|
||||
(begin (skip-to-eol!) (parse-all!)))
|
||||
(else
|
||||
(let ((words (list)))
|
||||
(begin
|
||||
(parse-words! words)
|
||||
(when (> (len words) 0)
|
||||
(append! commands {:type "command" :words words}))
|
||||
(parse-all!))))))))
|
||||
|
||||
(parse-all!)
|
||||
commands)))
|
||||
@@ -48,7 +48,7 @@ Core mapping:
|
||||
## Roadmap
|
||||
|
||||
### Phase 1 — tokenizer + parser
|
||||
- [ ] Tokenizer: Unicode glyphs (the full APL set: `+ - × ÷ * ⍟ ⌈ ⌊ | ! ? ○ ~ < ≤ = ≥ > ≠ ∊ ∧ ∨ ⍱ ⍲ , ⍪ ⍴ ⌽ ⊖ ⍉ ↑ ↓ ⊂ ⊃ ⊆ ∪ ∩ ⍳ ⍸ ⌷ ⍋ ⍒ ⊥ ⊤ ⊣ ⊢ ⍎ ⍕ ⍝`), operators (`/ \ ¨ ⍨ ∘ . ⍣ ⍤ ⍥ @`), numbers (`¯` for negative, `1E2`, `1J2` complex deferred), characters (`'a'`, `''` escape), strands (juxtaposition of literals: `1 2 3`), names, comments `⍝ …`
|
||||
- [x] Tokenizer: Unicode glyphs (the full APL set: `+ - × ÷ * ⍟ ⌈ ⌊ | ! ? ○ ~ < ≤ = ≥ > ≠ ∊ ∧ ∨ ⍱ ⍲ , ⍪ ⍴ ⌽ ⊖ ⍉ ↑ ↓ ⊂ ⊃ ⊆ ∪ ∩ ⍳ ⍸ ⌷ ⍋ ⍒ ⊥ ⊤ ⊣ ⊢ ⍎ ⍕ ⍝`), operators (`/ \ ¨ ⍨ ∘ . ⍣ ⍤ ⍥ @`), numbers (`¯` for negative, `1E2`, `1J2` complex deferred), characters (`'a'`, `''` escape), strands (juxtaposition of literals: `1 2 3`), names, comments `⍝ …`
|
||||
- [ ] Parser: right-to-left; classify each token as function, operator, value, or name; resolve valence positionally; dfn `{…}` body, tradfn `∇` header, guards `:`, control words `:If :While :For …` (Dyalog-style)
|
||||
- [ ] Unit tests in `lib/apl/tests/parse.sx`
|
||||
|
||||
@@ -108,7 +108,7 @@ Core mapping:
|
||||
|
||||
_Newest first._
|
||||
|
||||
- _(none yet)_
|
||||
- 2026-04-25: tokenizer (Phase 1 step 1) — 46/46 tests green; Unicode-aware starts-with? scanner for multi-byte APL glyphs; `lib/apl/tokenizer.sx` + `lib/apl/tests/parse.sx`
|
||||
|
||||
## Blockers
|
||||
|
||||
|
||||
@@ -50,7 +50,7 @@ Core mapping:
|
||||
## Roadmap
|
||||
|
||||
### Phase 1 — tokenizer + parser (the Dodekalogue)
|
||||
- [x] Tokenizer applying the 12 rules:
|
||||
- [ ] Tokenizer applying the 12 rules:
|
||||
1. Commands separated by `;` or newlines
|
||||
2. Words separated by whitespace within a command
|
||||
3. Double-quoted words: `\` escapes + `[…]` + `${…}` + `$var` substitution
|
||||
@@ -63,8 +63,8 @@ Core mapping:
|
||||
10. Order of substitution is left-to-right, single-pass
|
||||
11. Substitutions don't recurse — substituted text is not re-parsed
|
||||
12. The result of any substitution is the value, not a new script
|
||||
- [x] Parser: script = list of commands; command = list of words; word = literal string + list of substitutions
|
||||
- [x] Unit tests in `lib/tcl/tests/parse.sx`
|
||||
- [ ] Parser: script = list of commands; command = list of words; word = literal string + list of substitutions
|
||||
- [ ] Unit tests in `lib/tcl/tests/parse.sx`
|
||||
|
||||
### Phase 2 — sequential eval + core commands
|
||||
- [ ] `tcl-eval-script`: walk command list, dispatch each first-word into command table
|
||||
@@ -120,8 +120,7 @@ Core mapping:
|
||||
|
||||
_Newest first._
|
||||
|
||||
- 2026-04-25: Phase 1 parser — `lib/tcl/parser.sx`, word-simple?/word-literal helpers, 67 tests green, commit 6ee05259
|
||||
- 2026-04-25: Phase 1 tokenizer (Dodekalogue) — `lib/tcl/tokenizer.sx`, 52 tests green, commit 666e29d5
|
||||
- _(none yet)_
|
||||
|
||||
## Blockers
|
||||
|
||||
|
||||
Reference in New Issue
Block a user