diff --git a/lib/guest/lex.sx b/lib/guest/lex.sx new file mode 100644 index 00000000..5894dffa --- /dev/null +++ b/lib/guest/lex.sx @@ -0,0 +1,67 @@ +;; lib/guest/lex.sx — character-class predicates and token primitives shared +;; across guest tokenisers. +;; +;; All predicates are nil-safe — they accept nil (end-of-input) and return +;; false. This matches the convention used by the existing per-language +;; tokenisers (cur returns nil at EOF). +;; +;; Char classes +;; ------------ +;; lex-digit? — 0-9 +;; lex-hex-digit? — 0-9, a-f, A-F +;; lex-alpha? — a-z, A-Z (alias: lex-letter?) +;; lex-alnum? — alpha or digit +;; lex-ident-start? — alpha or underscore +;; lex-ident-char? — ident-start or digit +;; lex-space? — " ", "\t", "\r" (no newline) +;; lex-whitespace? — " ", "\t", "\r", "\n" (includes newline) +;; +;; Token record +;; ------------ +;; (lex-make-token TYPE VALUE POS) — {:type :value :pos} +;; (lex-make-token-spanning TYPE VALUE POS END) +;; — {:type :value :pos :end} +;; (lex-token-type TOK) +;; (lex-token-value TOK) +;; (lex-token-pos TOK) + +(define lex-digit? (fn (c) (and (not (= c nil)) (>= c "0") (<= c "9")))) + +(define + lex-hex-digit? + (fn + (c) + (and + (not (= c nil)) + (or + (lex-digit? c) + (and (>= c "a") (<= c "f")) + (and (>= c "A") (<= c "F")))))) + +(define + lex-alpha? + (fn + (c) + (and + (not (= c nil)) + (or (and (>= c "a") (<= c "z")) (and (>= c "A") (<= c "Z")))))) + +(define lex-letter? lex-alpha?) + +(define lex-alnum? (fn (c) (or (lex-alpha? c) (lex-digit? c)))) + +(define lex-ident-start? (fn (c) (or (lex-alpha? c) (= c "_")))) + +(define lex-ident-char? (fn (c) (or (lex-ident-start? c) (lex-digit? c)))) + +(define lex-space? (fn (c) (or (= c " ") (= c "\t") (= c "\r")))) + +(define lex-whitespace? (fn (c) (or (lex-space? c) (= c "\n")))) + +(define lex-make-token (fn (type value pos) {:pos pos :value value :type type})) + +(define lex-make-token-spanning (fn (type value pos end) {:pos pos :end end :value value :type type})) + +(define lex-token-type (fn (tok) (get tok :type))) +(define lex-token-value (fn (tok) (get tok :value))) +(define lex-token-pos (fn (tok) (get tok :pos))) diff --git a/lib/lua/test.sh b/lib/lua/test.sh index 719f3750..13dabffc 100755 --- a/lib/lua/test.sh +++ b/lib/lua/test.sh @@ -28,6 +28,8 @@ trap "rm -f $TMPFILE" EXIT cat > "$TMPFILE" << 'EPOCHS' (epoch 1) +(load "lib/guest/lex.sx") +(load "lib/guest/prefix.sx") (load "lib/lua/tokenizer.sx") (epoch 2) (load "lib/lua/parser.sx") diff --git a/lib/lua/tokenizer.sx b/lib/lua/tokenizer.sx index 6a09788d..32512705 100644 --- a/lib/lua/tokenizer.sx +++ b/lib/lua/tokenizer.sx @@ -1,31 +1,12 @@ -(define lua-make-token (fn (type value pos) {:pos pos :value value :type type})) +(prefix-rename "lua-" + '((make-token lex-make-token) + (digit? lex-digit?) + (hex-digit? lex-hex-digit?) + (letter? lex-alpha?) + (ident-start? lex-ident-start?) + (ident-char? lex-ident-char?) + (ws? lex-whitespace?))) -(define lua-digit? (fn (c) (and (not (= c nil)) (>= c "0") (<= c "9")))) - -(define - lua-hex-digit? - (fn - (c) - (and - (not (= c nil)) - (or - (lua-digit? c) - (and (>= c "a") (<= c "f")) - (and (>= c "A") (<= c "F")))))) - -(define - lua-letter? - (fn - (c) - (and - (not (= c nil)) - (or (and (>= c "a") (<= c "z")) (and (>= c "A") (<= c "Z")))))) - -(define lua-ident-start? (fn (c) (or (lua-letter? c) (= c "_")))) - -(define lua-ident-char? (fn (c) (or (lua-ident-start? c) (lua-digit? c)))) - -(define lua-ws? (fn (c) (or (= c " ") (= c "\t") (= c "\n") (= c "\r")))) (define lua-keywords diff --git a/lib/tcl/conformance.sh b/lib/tcl/conformance.sh index 50d0f8d0..23ce41fb 100755 --- a/lib/tcl/conformance.sh +++ b/lib/tcl/conformance.sh @@ -63,6 +63,8 @@ for tcl_file in "${TCL_FILES[@]}"; do # Build epoch input using quoted heredoc for static parts; helper path via variable cat > "$tmpfile" << EPOCHS (epoch 1) +(load "lib/guest/lex.sx") +(load "lib/guest/prefix.sx") (load "lib/tcl/tokenizer.sx") (epoch 2) (load "lib/tcl/parser.sx") diff --git a/lib/tcl/test.sh b/lib/tcl/test.sh index 445db51d..8b85edde 100755 --- a/lib/tcl/test.sh +++ b/lib/tcl/test.sh @@ -33,6 +33,8 @@ HELPER_EOF cat > "$TMPFILE" << EPOCHS (epoch 1) +(load "lib/guest/lex.sx") +(load "lib/guest/prefix.sx") (load "lib/tcl/tokenizer.sx") (epoch 2) (load "lib/tcl/parser.sx") diff --git a/lib/tcl/tokenizer.sx b/lib/tcl/tokenizer.sx index 6ad455ac..bc094ff3 100644 --- a/lib/tcl/tokenizer.sx +++ b/lib/tcl/tokenizer.sx @@ -1,19 +1,10 @@ -(define tcl-ws? (fn (c) (or (= c " ") (= c "\t") (= c "\r")))) +(prefix-rename "tcl-" + '((ws? lex-space?) + (alpha? lex-alpha?) + (digit? lex-digit?) + (ident-start? lex-ident-start?) + (ident-char? lex-ident-char?))) -(define tcl-alpha? - (fn (c) - (and - (not (= c nil)) - (or (and (>= c "a") (<= c "z")) (and (>= c "A") (<= c "Z")))))) - -(define tcl-digit? - (fn (c) (and (not (= c nil)) (>= c "0") (<= c "9")))) - -(define tcl-ident-start? - (fn (c) (or (tcl-alpha? c) (= c "_")))) - -(define tcl-ident-char? - (fn (c) (or (tcl-ident-start? c) (tcl-digit? c)))) (define tcl-tokenize (fn (src)