Recover agent-loop progress: lua/prolog/forth/erlang/haskell phases 1-2
Salvaged from worktree-agent-* branches killed during sx-tree MCP outage: - lua: tokenizer + parser + phase-2 transpile (~157 tests) - prolog: tokenizer + parser + unification (72 tests, plan update lost to WIP) - forth: phase-1 reader/interpreter + phase-2 colon/VARIABLE (134 tests) - erlang: tokenizer + parser (114 tests) - haskell: tokenizer + parse tests (43 tests) Cherry-picked file contents only, not branch history, to avoid pulling in unrelated ocaml-vm merge commits that were in those branches' bases.
This commit is contained in:
104
lib/haskell/test.sh
Executable file
104
lib/haskell/test.sh
Executable file
@@ -0,0 +1,104 @@
|
||||
#!/usr/bin/env bash
|
||||
# Fast Haskell-on-SX test runner — pipes directly to sx_server.exe.
|
||||
# No MCP, no Docker. All tests live in lib/haskell/tests/*.sx and
|
||||
# produce a summary dict at the end of each file.
|
||||
#
|
||||
# Usage:
|
||||
# bash lib/haskell/test.sh # run all tests
|
||||
# bash lib/haskell/test.sh -v # verbose — show each file's pass/fail
|
||||
# bash lib/haskell/test.sh tests/parse.sx # run one file
|
||||
|
||||
set -euo pipefail
|
||||
cd "$(git rev-parse --show-toplevel)"
|
||||
|
||||
SX_SERVER="hosts/ocaml/_build/default/bin/sx_server.exe"
|
||||
if [ ! -x "$SX_SERVER" ]; then
|
||||
# Fall back to the main-repo build if we're in a worktree.
|
||||
MAIN_ROOT=$(git worktree list | head -1 | awk '{print $1}')
|
||||
if [ -x "$MAIN_ROOT/$SX_SERVER" ]; then
|
||||
SX_SERVER="$MAIN_ROOT/$SX_SERVER"
|
||||
else
|
||||
echo "ERROR: sx_server.exe not found. Run: cd hosts/ocaml && dune build"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
VERBOSE=""
|
||||
FILES=()
|
||||
for arg in "$@"; do
|
||||
case "$arg" in
|
||||
-v|--verbose) VERBOSE=1 ;;
|
||||
*) FILES+=("$arg") ;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [ ${#FILES[@]} -eq 0 ]; then
|
||||
mapfile -t FILES < <(find lib/haskell/tests -maxdepth 2 -name '*.sx' | sort)
|
||||
fi
|
||||
|
||||
TOTAL_PASS=0
|
||||
TOTAL_FAIL=0
|
||||
FAILED_FILES=()
|
||||
|
||||
for FILE in "${FILES[@]}"; do
|
||||
[ -f "$FILE" ] || { echo "skip $FILE (not found)"; continue; }
|
||||
TMPFILE=$(mktemp)
|
||||
cat > "$TMPFILE" <<EPOCHS
|
||||
(epoch 1)
|
||||
(load "lib/haskell/tokenizer.sx")
|
||||
(epoch 2)
|
||||
(load "$FILE")
|
||||
(epoch 3)
|
||||
(eval "(list hk-test-pass hk-test-fail)")
|
||||
EPOCHS
|
||||
|
||||
OUTPUT=$(timeout 60 "$SX_SERVER" < "$TMPFILE" 2>&1 || true)
|
||||
rm -f "$TMPFILE"
|
||||
|
||||
# Output format: either "(ok 3 (P F))" on one line (short result) or
|
||||
# "(ok-len 3 N)\n(P F)" where the value appears on the following line.
|
||||
LINE=$(echo "$OUTPUT" | awk '/^\(ok-len 3 / {getline; print; exit}')
|
||||
if [ -z "$LINE" ]; then
|
||||
LINE=$(echo "$OUTPUT" | grep -E '^\(ok 3 \([0-9]+ [0-9]+\)\)' | tail -1 \
|
||||
| sed -E 's/^\(ok 3 //; s/\)$//')
|
||||
fi
|
||||
if [ -z "$LINE" ]; then
|
||||
echo "✗ $FILE: could not extract summary"
|
||||
echo "$OUTPUT" | tail -20
|
||||
TOTAL_FAIL=$((TOTAL_FAIL + 1))
|
||||
FAILED_FILES+=("$FILE")
|
||||
continue
|
||||
fi
|
||||
P=$(echo "$LINE" | sed -E 's/^\(([0-9]+) ([0-9]+)\).*/\1/')
|
||||
F=$(echo "$LINE" | sed -E 's/^\(([0-9]+) ([0-9]+)\).*/\2/')
|
||||
TOTAL_PASS=$((TOTAL_PASS + P))
|
||||
TOTAL_FAIL=$((TOTAL_FAIL + F))
|
||||
if [ "$F" -gt 0 ]; then
|
||||
FAILED_FILES+=("$FILE")
|
||||
printf '✗ %-40s %d/%d\n' "$FILE" "$P" "$((P+F))"
|
||||
# Print failure names
|
||||
TMPFILE2=$(mktemp)
|
||||
cat > "$TMPFILE2" <<EPOCHS
|
||||
(epoch 1)
|
||||
(load "lib/haskell/tokenizer.sx")
|
||||
(epoch 2)
|
||||
(load "$FILE")
|
||||
(epoch 3)
|
||||
(eval "(map (fn (f) (get f \"name\")) hk-test-fails)")
|
||||
EPOCHS
|
||||
FAILS=$(timeout 60 "$SX_SERVER" < "$TMPFILE2" 2>&1 | grep -E '^\(ok 3 ' || true)
|
||||
rm -f "$TMPFILE2"
|
||||
echo " $FAILS"
|
||||
elif [ "$VERBOSE" = "1" ]; then
|
||||
printf '✓ %-40s %d passed\n' "$FILE" "$P"
|
||||
fi
|
||||
done
|
||||
|
||||
TOTAL=$((TOTAL_PASS + TOTAL_FAIL))
|
||||
if [ $TOTAL_FAIL -eq 0 ]; then
|
||||
echo "✓ $TOTAL_PASS/$TOTAL haskell-on-sx tests passed"
|
||||
else
|
||||
echo "✗ $TOTAL_PASS/$TOTAL passed, $TOTAL_FAIL failed in: ${FAILED_FILES[*]}"
|
||||
fi
|
||||
|
||||
[ $TOTAL_FAIL -eq 0 ]
|
||||
251
lib/haskell/tests/parse.sx
Normal file
251
lib/haskell/tests/parse.sx
Normal file
@@ -0,0 +1,251 @@
|
||||
;; Haskell parser / tokenizer tests.
|
||||
;;
|
||||
;; Lightweight runner: each test checks actual vs expected with
|
||||
;; structural (deep) equality and accumulates pass/fail counters.
|
||||
;; Final value of this file is a summary dict with :pass :fail :fails.
|
||||
|
||||
(define
|
||||
hk-deep=?
|
||||
(fn
|
||||
(a b)
|
||||
(cond
|
||||
((= a b) true)
|
||||
((and (dict? a) (dict? b))
|
||||
(let
|
||||
((ak (keys a)) (bk (keys b)))
|
||||
(if
|
||||
(not (= (len ak) (len bk)))
|
||||
false
|
||||
(every?
|
||||
(fn
|
||||
(k)
|
||||
(and (has-key? b k) (hk-deep=? (get a k) (get b k))))
|
||||
ak))))
|
||||
((and (list? a) (list? b))
|
||||
(if
|
||||
(not (= (len a) (len b)))
|
||||
false
|
||||
(let
|
||||
((i 0) (ok true))
|
||||
(define
|
||||
hk-de-loop
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(and ok (< i (len a)))
|
||||
(do
|
||||
(when
|
||||
(not (hk-deep=? (nth a i) (nth b i)))
|
||||
(set! ok false))
|
||||
(set! i (+ i 1))
|
||||
(hk-de-loop)))))
|
||||
(hk-de-loop)
|
||||
ok)))
|
||||
(:else false))))
|
||||
|
||||
(define hk-test-pass 0)
|
||||
(define hk-test-fail 0)
|
||||
(define hk-test-fails (list))
|
||||
|
||||
(define
|
||||
hk-test
|
||||
(fn
|
||||
(name actual expected)
|
||||
(if
|
||||
(hk-deep=? actual expected)
|
||||
(set! hk-test-pass (+ hk-test-pass 1))
|
||||
(do
|
||||
(set! hk-test-fail (+ hk-test-fail 1))
|
||||
(append! hk-test-fails {:actual actual :expected expected :name name})))))
|
||||
|
||||
;; Convenience: tokenize and drop newline + eof tokens so tests focus
|
||||
;; on meaningful content. Returns list of {:type :value} pairs.
|
||||
(define
|
||||
hk-toks
|
||||
(fn
|
||||
(src)
|
||||
(map
|
||||
(fn (tok) {:value (get tok "value") :type (get tok "type")})
|
||||
(filter
|
||||
(fn
|
||||
(tok)
|
||||
(let
|
||||
((ty (get tok "type")))
|
||||
(not (or (= ty "newline") (= ty "eof")))))
|
||||
(hk-tokenize src)))))
|
||||
|
||||
;; ── 1. Identifiers & reserved words ──
|
||||
(hk-test "varid simple" (hk-toks "foo") (list {:value "foo" :type "varid"}))
|
||||
|
||||
(hk-test
|
||||
"varid with digits and prime"
|
||||
(hk-toks "foo123' bar2")
|
||||
(list {:value "foo123'" :type "varid"} {:value "bar2" :type "varid"}))
|
||||
|
||||
(hk-test "conid" (hk-toks "Maybe") (list {:value "Maybe" :type "conid"}))
|
||||
|
||||
(hk-test "reserved: where" (hk-toks "where") (list {:value "where" :type "reserved"}))
|
||||
|
||||
(hk-test
|
||||
"reserved: case of"
|
||||
(hk-toks "case of")
|
||||
(list {:value "case" :type "reserved"} {:value "of" :type "reserved"}))
|
||||
|
||||
(hk-test "underscore is reserved" (hk-toks "_") (list {:value "_" :type "reserved"}))
|
||||
|
||||
;; ── 2. Qualified names ──
|
||||
(hk-test "qvarid" (hk-toks "Data.Map.lookup") (list {:value "Data.Map.lookup" :type "qvarid"}))
|
||||
|
||||
(hk-test "qconid" (hk-toks "Data.Map") (list {:value "Data.Map" :type "qconid"}))
|
||||
|
||||
(hk-test "qualified operator" (hk-toks "Prelude.+") (list {:value "Prelude.+" :type "varsym"}))
|
||||
|
||||
;; ── 3. Numbers ──
|
||||
(hk-test "integer" (hk-toks "42") (list {:value 42 :type "integer"}))
|
||||
|
||||
(hk-test "hex" (hk-toks "0x2A") (list {:value 42 :type "integer"}))
|
||||
|
||||
(hk-test "octal" (hk-toks "0o17") (list {:value 15 :type "integer"}))
|
||||
|
||||
(hk-test "float" (hk-toks "3.14") (list {:value 3.14 :type "float"}))
|
||||
|
||||
(hk-test "float with exp" (hk-toks "1.5e-3") (list {:value 0.0015 :type "float"}))
|
||||
|
||||
;; ── 4. Strings / chars ──
|
||||
(hk-test "string" (hk-toks "\"hello\"") (list {:value "hello" :type "string"}))
|
||||
|
||||
(hk-test "char" (hk-toks "'a'") (list {:value "a" :type "char"}))
|
||||
|
||||
(hk-test "char escape newline" (hk-toks "'\\n'") (list {:value "\n" :type "char"}))
|
||||
|
||||
(hk-test "string escape" (hk-toks "\"a\\nb\"") (list {:value "a\nb" :type "string"}))
|
||||
|
||||
;; ── 5. Operators ──
|
||||
(hk-test "operator +" (hk-toks "+") (list {:value "+" :type "varsym"}))
|
||||
|
||||
(hk-test "operator >>=" (hk-toks ">>=") (list {:value ">>=" :type "varsym"}))
|
||||
|
||||
(hk-test "consym" (hk-toks ":+:") (list {:value ":+:" :type "consym"}))
|
||||
|
||||
(hk-test "reservedop ->" (hk-toks "->") (list {:value "->" :type "reservedop"}))
|
||||
|
||||
(hk-test "reservedop =>" (hk-toks "=>") (list {:value "=>" :type "reservedop"}))
|
||||
|
||||
(hk-test "reservedop .. (range)" (hk-toks "..") (list {:value ".." :type "reservedop"}))
|
||||
|
||||
(hk-test "reservedop backslash" (hk-toks "\\") (list {:value "\\" :type "reservedop"}))
|
||||
|
||||
;; ── 6. Punctuation ──
|
||||
(hk-test "parens" (hk-toks "( )") (list {:value "(" :type "lparen"} {:value ")" :type "rparen"}))
|
||||
|
||||
(hk-test "brackets" (hk-toks "[]") (list {:value "[" :type "lbracket"} {:value "]" :type "rbracket"}))
|
||||
|
||||
(hk-test "braces" (hk-toks "{}") (list {:value "{" :type "lbrace"} {:value "}" :type "rbrace"}))
|
||||
|
||||
(hk-test
|
||||
"backtick"
|
||||
(hk-toks "`mod`")
|
||||
(list {:value "`" :type "backtick"} {:value "mod" :type "varid"} {:value "`" :type "backtick"}))
|
||||
|
||||
(hk-test "comma and semi" (hk-toks ",;") (list {:value "," :type "comma"} {:value ";" :type "semi"}))
|
||||
|
||||
;; ── 7. Comments ──
|
||||
(hk-test "line comment stripped" (hk-toks "-- a comment") (list))
|
||||
|
||||
(hk-test "line comment before code" (hk-toks "-- c\nfoo") (list {:value "foo" :type "varid"}))
|
||||
|
||||
(hk-test
|
||||
"block comment stripped"
|
||||
(hk-toks "{- block -} foo")
|
||||
(list {:value "foo" :type "varid"}))
|
||||
|
||||
(hk-test
|
||||
"nested block comment"
|
||||
(hk-toks "{- {- nested -} -} x")
|
||||
(list {:value "x" :type "varid"}))
|
||||
|
||||
(hk-test
|
||||
"-- inside operator is comment in Haskell"
|
||||
(hk-toks "-->")
|
||||
(list {:value "-->" :type "varsym"}))
|
||||
|
||||
;; ── 8. Mixed declarations ──
|
||||
(hk-test
|
||||
"type signature"
|
||||
(hk-toks "main :: IO ()")
|
||||
(list {:value "main" :type "varid"} {:value "::" :type "reservedop"} {:value "IO" :type "conid"} {:value "(" :type "lparen"} {:value ")" :type "rparen"}))
|
||||
|
||||
(hk-test
|
||||
"data declaration"
|
||||
(hk-toks "data Maybe a = Nothing | Just a")
|
||||
(list
|
||||
{:value "data" :type "reserved"}
|
||||
{:value "Maybe" :type "conid"}
|
||||
{:value "a" :type "varid"}
|
||||
{:value "=" :type "reservedop"}
|
||||
{:value "Nothing" :type "conid"}
|
||||
{:value "|" :type "reservedop"}
|
||||
{:value "Just" :type "conid"}
|
||||
{:value "a" :type "varid"}))
|
||||
|
||||
(hk-test
|
||||
"lambda"
|
||||
(hk-toks "\\x -> x + 1")
|
||||
(list {:value "\\" :type "reservedop"} {:value "x" :type "varid"} {:value "->" :type "reservedop"} {:value "x" :type "varid"} {:value "+" :type "varsym"} {:value 1 :type "integer"}))
|
||||
|
||||
(hk-test
|
||||
"let expression"
|
||||
(hk-toks "let x = 1 in x + x")
|
||||
(list
|
||||
{:value "let" :type "reserved"}
|
||||
{:value "x" :type "varid"}
|
||||
{:value "=" :type "reservedop"}
|
||||
{:value 1 :type "integer"}
|
||||
{:value "in" :type "reserved"}
|
||||
{:value "x" :type "varid"}
|
||||
{:value "+" :type "varsym"}
|
||||
{:value "x" :type "varid"}))
|
||||
|
||||
(hk-test
|
||||
"case expr"
|
||||
(hk-toks "case x of Just y -> y")
|
||||
(list
|
||||
{:value "case" :type "reserved"}
|
||||
{:value "x" :type "varid"}
|
||||
{:value "of" :type "reserved"}
|
||||
{:value "Just" :type "conid"}
|
||||
{:value "y" :type "varid"}
|
||||
{:value "->" :type "reservedop"}
|
||||
{:value "y" :type "varid"}))
|
||||
|
||||
(hk-test
|
||||
"list literal"
|
||||
(hk-toks "[1, 2, 3]")
|
||||
(list
|
||||
{:value "[" :type "lbracket"}
|
||||
{:value 1 :type "integer"}
|
||||
{:value "," :type "comma"}
|
||||
{:value 2 :type "integer"}
|
||||
{:value "," :type "comma"}
|
||||
{:value 3 :type "integer"}
|
||||
{:value "]" :type "rbracket"}))
|
||||
|
||||
(hk-test
|
||||
"range syntax"
|
||||
(hk-toks "[1..10]")
|
||||
(list {:value "[" :type "lbracket"} {:value 1 :type "integer"} {:value ".." :type "reservedop"} {:value 10 :type "integer"} {:value "]" :type "rbracket"}))
|
||||
|
||||
;; ── 9. Positions ──
|
||||
(hk-test
|
||||
"line/col positions"
|
||||
(let
|
||||
((toks (hk-tokenize "foo\n bar")))
|
||||
(list
|
||||
(get (nth toks 0) "line")
|
||||
(get (nth toks 0) "col")
|
||||
(get (nth toks 2) "line")
|
||||
(get (nth toks 2) "col")))
|
||||
(list 1 1 2 3))
|
||||
|
||||
;; ── Summary — final value of this file ──
|
||||
{:fails hk-test-fails :pass hk-test-pass :fail hk-test-fail}
|
||||
628
lib/haskell/tokenizer.sx
Normal file
628
lib/haskell/tokenizer.sx
Normal file
@@ -0,0 +1,628 @@
|
||||
;; Haskell tokenizer — produces a token stream from Haskell 98 source.
|
||||
;;
|
||||
;; Tokens: {:type T :value V :line L :col C}
|
||||
;;
|
||||
;; Types:
|
||||
;; "varid" lowercase ident, e.g. fmap, x, myFunc
|
||||
;; "conid" uppercase ident, e.g. Nothing, Just, Map
|
||||
;; "qvarid" qualified varid, value holds raw "A.B.foo"
|
||||
;; "qconid" qualified conid, e.g. "Data.Map"
|
||||
;; "reserved" reserved word — value is the word
|
||||
;; "varsym" operator symbol, e.g. +, ++, >>=
|
||||
;; "consym" constructor operator (starts with :), e.g. :, :+
|
||||
;; "reservedop" reserved operator ("::", "=", "->", "<-", "=>", "|", "\\", "@", "~", "..")
|
||||
;; "integer" integer literal (number)
|
||||
;; "float" float literal (number)
|
||||
;; "char" char literal (string of length 1)
|
||||
;; "string" string literal
|
||||
;; "lparen" "rparen" "lbracket" "rbracket" "lbrace" "rbrace"
|
||||
;; "vlbrace" "vrbrace" "vsemi" virtual layout tokens (inserted later)
|
||||
;; "comma" "semi" "backtick"
|
||||
;; "newline" a logical line break (used by layout pass; stripped afterwards)
|
||||
;; "eof"
|
||||
;;
|
||||
;; Note: SX `cond`/`when` clauses evaluate ONLY their last expression.
|
||||
;; Multi-expression bodies must be wrapped in (do ...). All helpers use
|
||||
;; the hk- prefix to avoid clashing with SX evaluator special forms.
|
||||
|
||||
;; ── Char-code table ───────────────────────────────────────────────
|
||||
(define
|
||||
hk-ord-table
|
||||
(let
|
||||
((t (dict)) (i 0))
|
||||
(define
|
||||
hk-build-table
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(< i 128)
|
||||
(do
|
||||
(dict-set! t (char-from-code i) i)
|
||||
(set! i (+ i 1))
|
||||
(hk-build-table)))))
|
||||
(hk-build-table)
|
||||
t))
|
||||
|
||||
(define hk-ord (fn (c) (or (get hk-ord-table c) 0)))
|
||||
|
||||
;; ── Character predicates ──────────────────────────────────────────
|
||||
(define
|
||||
hk-digit?
|
||||
(fn (c) (and (string? c) (>= (hk-ord c) 48) (<= (hk-ord c) 57))))
|
||||
|
||||
(define
|
||||
hk-hex-digit?
|
||||
(fn
|
||||
(c)
|
||||
(and
|
||||
(string? c)
|
||||
(or
|
||||
(and (>= (hk-ord c) 48) (<= (hk-ord c) 57))
|
||||
(and (>= (hk-ord c) 97) (<= (hk-ord c) 102))
|
||||
(and (>= (hk-ord c) 65) (<= (hk-ord c) 70))))))
|
||||
|
||||
(define
|
||||
hk-octal-digit?
|
||||
(fn (c) (and (string? c) (>= (hk-ord c) 48) (<= (hk-ord c) 55))))
|
||||
|
||||
(define
|
||||
hk-lower?
|
||||
(fn
|
||||
(c)
|
||||
(and
|
||||
(string? c)
|
||||
(or (and (>= (hk-ord c) 97) (<= (hk-ord c) 122)) (= c "_")))))
|
||||
|
||||
(define
|
||||
hk-upper?
|
||||
(fn (c) (and (string? c) (>= (hk-ord c) 65) (<= (hk-ord c) 90))))
|
||||
|
||||
(define hk-alpha? (fn (c) (or (hk-lower? c) (hk-upper? c))))
|
||||
|
||||
(define
|
||||
hk-ident-char?
|
||||
(fn (c) (or (hk-alpha? c) (hk-digit? c) (= c "'"))))
|
||||
|
||||
(define
|
||||
hk-symbol-char?
|
||||
(fn
|
||||
(c)
|
||||
(or
|
||||
(= c "!")
|
||||
(= c "#")
|
||||
(= c "$")
|
||||
(= c "%")
|
||||
(= c "&")
|
||||
(= c "*")
|
||||
(= c "+")
|
||||
(= c ".")
|
||||
(= c "/")
|
||||
(= c "<")
|
||||
(= c "=")
|
||||
(= c ">")
|
||||
(= c "?")
|
||||
(= c "@")
|
||||
(= c "\\")
|
||||
(= c "^")
|
||||
(= c "|")
|
||||
(= c "-")
|
||||
(= c "~")
|
||||
(= c ":"))))
|
||||
|
||||
(define hk-space? (fn (c) (or (= c " ") (= c "\t"))))
|
||||
|
||||
;; ── Hex / oct parser (parse-int is decimal only) ──────────────────
|
||||
(define
|
||||
hk-parse-radix
|
||||
(fn
|
||||
(s radix)
|
||||
(let
|
||||
((n-len (len s)) (idx 0) (acc 0))
|
||||
(define
|
||||
hk-rad-loop
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(< idx n-len)
|
||||
(do
|
||||
(let
|
||||
((c (substring s idx (+ idx 1))))
|
||||
(cond
|
||||
((and (>= (hk-ord c) 48) (<= (hk-ord c) 57))
|
||||
(set! acc (+ (* acc radix) (- (hk-ord c) 48))))
|
||||
((and (>= (hk-ord c) 97) (<= (hk-ord c) 102))
|
||||
(set! acc (+ (* acc radix) (+ 10 (- (hk-ord c) 97)))))
|
||||
((and (>= (hk-ord c) 65) (<= (hk-ord c) 70))
|
||||
(set! acc (+ (* acc radix) (+ 10 (- (hk-ord c) 65)))))))
|
||||
(set! idx (+ idx 1))
|
||||
(hk-rad-loop)))))
|
||||
(hk-rad-loop)
|
||||
acc)))
|
||||
|
||||
(define
|
||||
hk-parse-float
|
||||
(fn
|
||||
(s)
|
||||
(let
|
||||
((n-len (len s))
|
||||
(idx 0)
|
||||
(sign 1)
|
||||
(int-part 0)
|
||||
(frac-part 0)
|
||||
(frac-div 1)
|
||||
(exp-sign 1)
|
||||
(exp-val 0)
|
||||
(has-exp false))
|
||||
(when
|
||||
(and (< idx n-len) (= (substring s idx (+ idx 1)) "-"))
|
||||
(do (set! sign -1) (set! idx (+ idx 1))))
|
||||
(when
|
||||
(and (< idx n-len) (= (substring s idx (+ idx 1)) "+"))
|
||||
(set! idx (+ idx 1)))
|
||||
(define
|
||||
hk-int-loop
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(and (< idx n-len) (hk-digit? (substring s idx (+ idx 1))))
|
||||
(do
|
||||
(set!
|
||||
int-part
|
||||
(+ (* int-part 10) (parse-int (substring s idx (+ idx 1)))))
|
||||
(set! idx (+ idx 1))
|
||||
(hk-int-loop)))))
|
||||
(hk-int-loop)
|
||||
(when
|
||||
(and (< idx n-len) (= (substring s idx (+ idx 1)) "."))
|
||||
(do
|
||||
(set! idx (+ idx 1))
|
||||
(define
|
||||
hk-frac-loop
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(and (< idx n-len) (hk-digit? (substring s idx (+ idx 1))))
|
||||
(do
|
||||
(set! frac-div (* frac-div 10))
|
||||
(set!
|
||||
frac-part
|
||||
(+
|
||||
frac-part
|
||||
(/ (parse-int (substring s idx (+ idx 1))) frac-div)))
|
||||
(set! idx (+ idx 1))
|
||||
(hk-frac-loop)))))
|
||||
(hk-frac-loop)))
|
||||
(when
|
||||
(and
|
||||
(< idx n-len)
|
||||
(let
|
||||
((c (substring s idx (+ idx 1))))
|
||||
(or (= c "e") (= c "E"))))
|
||||
(do
|
||||
(set! has-exp true)
|
||||
(set! idx (+ idx 1))
|
||||
(cond
|
||||
((and (< idx n-len) (= (substring s idx (+ idx 1)) "-"))
|
||||
(do (set! exp-sign -1) (set! idx (+ idx 1))))
|
||||
((and (< idx n-len) (= (substring s idx (+ idx 1)) "+"))
|
||||
(set! idx (+ idx 1))))
|
||||
(define
|
||||
hk-exp-loop
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(and (< idx n-len) (hk-digit? (substring s idx (+ idx 1))))
|
||||
(do
|
||||
(set!
|
||||
exp-val
|
||||
(+
|
||||
(* exp-val 10)
|
||||
(parse-int (substring s idx (+ idx 1)))))
|
||||
(set! idx (+ idx 1))
|
||||
(hk-exp-loop)))))
|
||||
(hk-exp-loop)))
|
||||
(let
|
||||
((base (* sign (+ int-part frac-part))))
|
||||
(if has-exp (* base (pow 10 (* exp-sign exp-val))) base)))))
|
||||
|
||||
;; ── Reserved words / ops ──────────────────────────────────────────
|
||||
(define
|
||||
hk-reserved-words
|
||||
(list
|
||||
"case"
|
||||
"class"
|
||||
"data"
|
||||
"default"
|
||||
"deriving"
|
||||
"do"
|
||||
"else"
|
||||
"foreign"
|
||||
"if"
|
||||
"import"
|
||||
"in"
|
||||
"infix"
|
||||
"infixl"
|
||||
"infixr"
|
||||
"instance"
|
||||
"let"
|
||||
"module"
|
||||
"newtype"
|
||||
"of"
|
||||
"then"
|
||||
"type"
|
||||
"where"
|
||||
"_"))
|
||||
|
||||
(define hk-reserved? (fn (w) (contains? hk-reserved-words w)))
|
||||
|
||||
(define
|
||||
hk-reserved-ops
|
||||
(list ".." ":" "::" "=" "\\" "|" "<-" "->" "@" "~" "=>"))
|
||||
|
||||
(define hk-reserved-op? (fn (w) (contains? hk-reserved-ops w)))
|
||||
|
||||
;; ── Token constructor ─────────────────────────────────────────────
|
||||
(define hk-make-token (fn (type value line col) {:line line :value value :col col :type type}))
|
||||
|
||||
;; ── Main tokenizer ────────────────────────────────────────────────
|
||||
(define
|
||||
hk-tokenize
|
||||
(fn
|
||||
(src)
|
||||
(let
|
||||
((tokens (list)) (pos 0) (line 1) (col 1) (src-len (len src)))
|
||||
(define
|
||||
hk-peek
|
||||
(fn
|
||||
(offset)
|
||||
(if
|
||||
(< (+ pos offset) src-len)
|
||||
(substring src (+ pos offset) (+ pos offset 1))
|
||||
nil)))
|
||||
(define hk-cur (fn () (hk-peek 0)))
|
||||
(define
|
||||
hk-advance!
|
||||
(fn
|
||||
()
|
||||
(let
|
||||
((c (hk-cur)))
|
||||
(set! pos (+ pos 1))
|
||||
(if
|
||||
(= c "\n")
|
||||
(do (set! line (+ line 1)) (set! col 1))
|
||||
(set! col (+ col 1))))))
|
||||
(define
|
||||
hk-advance-n!
|
||||
(fn
|
||||
(n)
|
||||
(when (> n 0) (do (hk-advance!) (hk-advance-n! (- n 1))))))
|
||||
(define
|
||||
hk-push!
|
||||
(fn
|
||||
(type value tok-line tok-col)
|
||||
(append! tokens (hk-make-token type value tok-line tok-col))))
|
||||
(define
|
||||
hk-read-while
|
||||
(fn
|
||||
(pred)
|
||||
(let
|
||||
((start pos))
|
||||
(define
|
||||
hk-rw-loop
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(and (< pos src-len) (pred (hk-cur)))
|
||||
(do (hk-advance!) (hk-rw-loop)))))
|
||||
(hk-rw-loop)
|
||||
(substring src start pos))))
|
||||
(define
|
||||
hk-skip-line-comment!
|
||||
(fn
|
||||
()
|
||||
(define
|
||||
hk-slc-loop
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(and (< pos src-len) (not (= (hk-cur) "\n")))
|
||||
(do (hk-advance!) (hk-slc-loop)))))
|
||||
(hk-slc-loop)))
|
||||
(define
|
||||
hk-skip-block-comment!
|
||||
(fn
|
||||
()
|
||||
(hk-advance-n! 2)
|
||||
(let
|
||||
((depth 1))
|
||||
(define
|
||||
hk-sbc-loop
|
||||
(fn
|
||||
()
|
||||
(cond
|
||||
((>= pos src-len) nil)
|
||||
((and (= (hk-cur) "{") (= (hk-peek 1) "-"))
|
||||
(do
|
||||
(hk-advance-n! 2)
|
||||
(set! depth (+ depth 1))
|
||||
(hk-sbc-loop)))
|
||||
((and (= (hk-cur) "-") (= (hk-peek 1) "}"))
|
||||
(do
|
||||
(hk-advance-n! 2)
|
||||
(set! depth (- depth 1))
|
||||
(when (> depth 0) (hk-sbc-loop))))
|
||||
(:else (do (hk-advance!) (hk-sbc-loop))))))
|
||||
(hk-sbc-loop))))
|
||||
(define
|
||||
hk-read-escape
|
||||
(fn
|
||||
()
|
||||
(hk-advance!)
|
||||
(let
|
||||
((c (hk-cur)))
|
||||
(cond
|
||||
((= c "n") (do (hk-advance!) "\n"))
|
||||
((= c "t") (do (hk-advance!) "\t"))
|
||||
((= c "r") (do (hk-advance!) "\r"))
|
||||
((= c "\\") (do (hk-advance!) "\\"))
|
||||
((= c "'") (do (hk-advance!) "'"))
|
||||
((= c "\"") (do (hk-advance!) "\""))
|
||||
((= c "0") (do (hk-advance!) (char-from-code 0)))
|
||||
((= c "a") (do (hk-advance!) (char-from-code 7)))
|
||||
((= c "b") (do (hk-advance!) (char-from-code 8)))
|
||||
((= c "f") (do (hk-advance!) (char-from-code 12)))
|
||||
((= c "v") (do (hk-advance!) (char-from-code 11)))
|
||||
((hk-digit? c)
|
||||
(let
|
||||
((digits (hk-read-while hk-digit?)))
|
||||
(char-from-code (parse-int digits))))
|
||||
(:else (do (hk-advance!) (str "\\" c)))))))
|
||||
(define
|
||||
hk-read-string
|
||||
(fn
|
||||
()
|
||||
(let
|
||||
((parts (list)))
|
||||
(hk-advance!)
|
||||
(define
|
||||
hk-rs-loop
|
||||
(fn
|
||||
()
|
||||
(cond
|
||||
((>= pos src-len) nil)
|
||||
((= (hk-cur) "\"") (hk-advance!))
|
||||
((= (hk-cur) "\\")
|
||||
(do (append! parts (hk-read-escape)) (hk-rs-loop)))
|
||||
(:else
|
||||
(do
|
||||
(append! parts (hk-cur))
|
||||
(hk-advance!)
|
||||
(hk-rs-loop))))))
|
||||
(hk-rs-loop)
|
||||
(join "" parts))))
|
||||
(define
|
||||
hk-read-char-lit
|
||||
(fn
|
||||
()
|
||||
(hk-advance!)
|
||||
(let
|
||||
((c (if (= (hk-cur) "\\") (hk-read-escape) (let ((ch (hk-cur))) (hk-advance!) ch))))
|
||||
(when (= (hk-cur) "'") (hk-advance!))
|
||||
c)))
|
||||
(define
|
||||
hk-read-number
|
||||
(fn
|
||||
(tok-line tok-col)
|
||||
(let
|
||||
((start pos))
|
||||
(cond
|
||||
((and (= (hk-cur) "0") (or (= (hk-peek 1) "x") (= (hk-peek 1) "X")))
|
||||
(do
|
||||
(hk-advance-n! 2)
|
||||
(let
|
||||
((hex-start pos))
|
||||
(hk-read-while hk-hex-digit?)
|
||||
(hk-push!
|
||||
"integer"
|
||||
(hk-parse-radix (substring src hex-start pos) 16)
|
||||
tok-line
|
||||
tok-col))))
|
||||
((and (= (hk-cur) "0") (or (= (hk-peek 1) "o") (= (hk-peek 1) "O")))
|
||||
(do
|
||||
(hk-advance-n! 2)
|
||||
(let
|
||||
((oct-start pos))
|
||||
(hk-read-while hk-octal-digit?)
|
||||
(hk-push!
|
||||
"integer"
|
||||
(hk-parse-radix (substring src oct-start pos) 8)
|
||||
tok-line
|
||||
tok-col))))
|
||||
(:else
|
||||
(do
|
||||
(hk-read-while hk-digit?)
|
||||
(let
|
||||
((is-float false))
|
||||
(when
|
||||
(and (= (hk-cur) ".") (hk-digit? (hk-peek 1)))
|
||||
(do
|
||||
(set! is-float true)
|
||||
(hk-advance!)
|
||||
(hk-read-while hk-digit?)))
|
||||
(when
|
||||
(or (= (hk-cur) "e") (= (hk-cur) "E"))
|
||||
(do
|
||||
(set! is-float true)
|
||||
(hk-advance!)
|
||||
(when
|
||||
(or (= (hk-cur) "+") (= (hk-cur) "-"))
|
||||
(hk-advance!))
|
||||
(hk-read-while hk-digit?)))
|
||||
(let
|
||||
((num-str (substring src start pos)))
|
||||
(if
|
||||
is-float
|
||||
(hk-push!
|
||||
"float"
|
||||
(hk-parse-float num-str)
|
||||
tok-line
|
||||
tok-col)
|
||||
(hk-push!
|
||||
"integer"
|
||||
(parse-int num-str)
|
||||
tok-line
|
||||
tok-col))))))))))
|
||||
(define
|
||||
hk-read-qualified!
|
||||
(fn
|
||||
(tok-line tok-col)
|
||||
(let
|
||||
((parts (list)) (w (hk-read-while hk-ident-char?)))
|
||||
(append! parts w)
|
||||
(let
|
||||
((emitted false))
|
||||
(define
|
||||
hk-rq-loop
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(and
|
||||
(not emitted)
|
||||
(= (hk-cur) ".")
|
||||
(or
|
||||
(hk-upper? (hk-peek 1))
|
||||
(hk-lower? (hk-peek 1))
|
||||
(hk-symbol-char? (hk-peek 1))))
|
||||
(let
|
||||
((next (hk-peek 1)))
|
||||
(cond
|
||||
((hk-upper? next)
|
||||
(do
|
||||
(hk-advance!)
|
||||
(append! parts ".")
|
||||
(append! parts (hk-read-while hk-ident-char?))
|
||||
(hk-rq-loop)))
|
||||
((hk-lower? next)
|
||||
(do
|
||||
(hk-advance!)
|
||||
(set! emitted true)
|
||||
(hk-push!
|
||||
"qvarid"
|
||||
(str
|
||||
(join "" parts)
|
||||
"."
|
||||
(hk-read-while hk-ident-char?))
|
||||
tok-line
|
||||
tok-col)))
|
||||
((hk-symbol-char? next)
|
||||
(do
|
||||
(hk-advance!)
|
||||
(set! emitted true)
|
||||
(hk-push!
|
||||
"varsym"
|
||||
(str
|
||||
(join "" parts)
|
||||
"."
|
||||
(hk-read-while hk-symbol-char?))
|
||||
tok-line
|
||||
tok-col))))))))
|
||||
(hk-rq-loop)
|
||||
(when
|
||||
(not emitted)
|
||||
(let
|
||||
((full (join "" parts)))
|
||||
(if
|
||||
(string-contains? full ".")
|
||||
(hk-push! "qconid" full tok-line tok-col)
|
||||
(hk-push! "conid" full tok-line tok-col))))))))
|
||||
(define
|
||||
hk-scan!
|
||||
(fn
|
||||
()
|
||||
(cond
|
||||
((>= pos src-len) nil)
|
||||
((hk-space? (hk-cur)) (do (hk-advance!) (hk-scan!)))
|
||||
((= (hk-cur) "\n")
|
||||
(do
|
||||
(let
|
||||
((l line) (c col))
|
||||
(hk-advance!)
|
||||
(hk-push! "newline" nil l c))
|
||||
(hk-scan!)))
|
||||
((and (= (hk-cur) "{") (= (hk-peek 1) "-"))
|
||||
(do (hk-skip-block-comment!) (hk-scan!)))
|
||||
((and (= (hk-cur) "-") (= (hk-peek 1) "-") (let ((p2 (hk-peek 2))) (or (nil? p2) (= p2 "\n") (not (hk-symbol-char? p2)))))
|
||||
(do (hk-skip-line-comment!) (hk-scan!)))
|
||||
((= (hk-cur) "\"")
|
||||
(do
|
||||
(let
|
||||
((l line) (c col))
|
||||
(hk-push! "string" (hk-read-string) l c))
|
||||
(hk-scan!)))
|
||||
((= (hk-cur) "'")
|
||||
(do
|
||||
(let
|
||||
((l line) (c col))
|
||||
(hk-push! "char" (hk-read-char-lit) l c))
|
||||
(hk-scan!)))
|
||||
((hk-digit? (hk-cur))
|
||||
(do (hk-read-number line col) (hk-scan!)))
|
||||
((hk-lower? (hk-cur))
|
||||
(do
|
||||
(let
|
||||
((l line) (c col))
|
||||
(let
|
||||
((w (hk-read-while hk-ident-char?)))
|
||||
(if
|
||||
(hk-reserved? w)
|
||||
(hk-push! "reserved" w l c)
|
||||
(hk-push! "varid" w l c))))
|
||||
(hk-scan!)))
|
||||
((hk-upper? (hk-cur))
|
||||
(do
|
||||
(let ((l line) (c col)) (hk-read-qualified! l c))
|
||||
(hk-scan!)))
|
||||
((= (hk-cur) "(")
|
||||
(do (hk-push! "lparen" "(" line col) (hk-advance!) (hk-scan!)))
|
||||
((= (hk-cur) ")")
|
||||
(do (hk-push! "rparen" ")" line col) (hk-advance!) (hk-scan!)))
|
||||
((= (hk-cur) "[")
|
||||
(do
|
||||
(hk-push! "lbracket" "[" line col)
|
||||
(hk-advance!)
|
||||
(hk-scan!)))
|
||||
((= (hk-cur) "]")
|
||||
(do
|
||||
(hk-push! "rbracket" "]" line col)
|
||||
(hk-advance!)
|
||||
(hk-scan!)))
|
||||
((= (hk-cur) "{")
|
||||
(do (hk-push! "lbrace" "{" line col) (hk-advance!) (hk-scan!)))
|
||||
((= (hk-cur) "}")
|
||||
(do (hk-push! "rbrace" "}" line col) (hk-advance!) (hk-scan!)))
|
||||
((= (hk-cur) ",")
|
||||
(do (hk-push! "comma" "," line col) (hk-advance!) (hk-scan!)))
|
||||
((= (hk-cur) ";")
|
||||
(do (hk-push! "semi" ";" line col) (hk-advance!) (hk-scan!)))
|
||||
((= (hk-cur) "`")
|
||||
(do
|
||||
(hk-push! "backtick" "`" line col)
|
||||
(hk-advance!)
|
||||
(hk-scan!)))
|
||||
((hk-symbol-char? (hk-cur))
|
||||
(do
|
||||
(let
|
||||
((l line) (c col))
|
||||
(let
|
||||
((first (hk-cur)))
|
||||
(let
|
||||
((w (hk-read-while hk-symbol-char?)))
|
||||
(cond
|
||||
((hk-reserved-op? w) (hk-push! "reservedop" w l c))
|
||||
((= first ":") (hk-push! "consym" w l c))
|
||||
(:else (hk-push! "varsym" w l c))))))
|
||||
(hk-scan!)))
|
||||
(:else (do (hk-advance!) (hk-scan!))))))
|
||||
(hk-scan!)
|
||||
(hk-push! "eof" nil line col)
|
||||
tokens)))
|
||||
Reference in New Issue
Block a user