Meta-circular evaluator: the SX language specifying its own semantics. A thin bootstrap compiler per target (JS, Python, Rust) reads these .sx files and emits a native evaluator. Files: - eval.sx: Core evaluator — type dispatch, special forms, TCO trampoline, lambda/component/macro invocation, higher-order forms - primitives.sx: Declarative specification of ~80 built-in pure functions - render.sx: Three rendering modes (DOM, HTML string, SX wire format) - parser.sx: Tokenizer, parser, and serializer specification Platform-specific concerns (DOM ops, async I/O, HTML emission) are declared as interfaces that each target implements. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
320 lines
10 KiB
Plaintext
320 lines
10 KiB
Plaintext
;; ==========================================================================
|
|
;; parser.sx — Reference SX parser specification
|
|
;;
|
|
;; Defines how SX source text is tokenized and parsed into AST.
|
|
;; The parser is intentionally simple — s-expressions need minimal parsing.
|
|
;;
|
|
;; Grammar:
|
|
;; program → expr*
|
|
;; expr → atom | list | quote-sugar
|
|
;; list → '(' expr* ')'
|
|
;; atom → string | number | keyword | symbol | boolean | nil
|
|
;; string → '"' (char | escape)* '"'
|
|
;; number → '-'? digit+ ('.' digit+)? ([eE] [+-]? digit+)?
|
|
;; keyword → ':' ident
|
|
;; symbol → ident
|
|
;; boolean → 'true' | 'false'
|
|
;; nil → 'nil'
|
|
;; ident → [a-zA-Z_~*+\-><=/!?&] [a-zA-Z0-9_~*+\-><=/!?.:&]*
|
|
;; comment → ';' to end of line (discarded)
|
|
;;
|
|
;; Quote sugar (optional — not used in current SX):
|
|
;; '(expr) → (quote expr)
|
|
;; `(expr) → (quasiquote expr)
|
|
;; ~(expr) → (unquote expr)
|
|
;; ~@(expr) → (splice-unquote expr)
|
|
;; ==========================================================================
|
|
|
|
|
|
;; --------------------------------------------------------------------------
|
|
;; Tokenizer
|
|
;; --------------------------------------------------------------------------
|
|
;; Produces a flat stream of tokens from source text.
|
|
;; Each token is a (type value line col) tuple.
|
|
|
|
(define tokenize
|
|
(fn (source)
|
|
(let ((pos 0)
|
|
(line 1)
|
|
(col 1)
|
|
(tokens (list))
|
|
(len-src (len source)))
|
|
;; Main loop — bootstrap compilers convert to while
|
|
(define scan-next
|
|
(fn ()
|
|
(when (< pos len-src)
|
|
(let ((ch (nth source pos)))
|
|
(cond
|
|
;; Whitespace — skip
|
|
(whitespace? ch)
|
|
(do (advance-pos!) (scan-next))
|
|
|
|
;; Comment — skip to end of line
|
|
(= ch ";")
|
|
(do (skip-to-eol!) (scan-next))
|
|
|
|
;; String
|
|
(= ch "\"")
|
|
(do (append! tokens (scan-string)) (scan-next))
|
|
|
|
;; Open paren
|
|
(= ch "(")
|
|
(do (append! tokens (list "lparen" "(" line col))
|
|
(advance-pos!)
|
|
(scan-next))
|
|
|
|
;; Close paren
|
|
(= ch ")")
|
|
(do (append! tokens (list "rparen" ")" line col))
|
|
(advance-pos!)
|
|
(scan-next))
|
|
|
|
;; Open bracket (list sugar)
|
|
(= ch "[")
|
|
(do (append! tokens (list "lbracket" "[" line col))
|
|
(advance-pos!)
|
|
(scan-next))
|
|
|
|
;; Close bracket
|
|
(= ch "]")
|
|
(do (append! tokens (list "rbracket" "]" line col))
|
|
(advance-pos!)
|
|
(scan-next))
|
|
|
|
;; Keyword
|
|
(= ch ":")
|
|
(do (append! tokens (scan-keyword)) (scan-next))
|
|
|
|
;; Number (or negative number)
|
|
(or (digit? ch)
|
|
(and (= ch "-") (< (inc pos) len-src)
|
|
(digit? (nth source (inc pos)))))
|
|
(do (append! tokens (scan-number)) (scan-next))
|
|
|
|
;; Symbol
|
|
(ident-start? ch)
|
|
(do (append! tokens (scan-symbol)) (scan-next))
|
|
|
|
;; Unknown — skip
|
|
:else
|
|
(do (advance-pos!) (scan-next)))))))
|
|
(scan-next)
|
|
tokens)))
|
|
|
|
|
|
;; --------------------------------------------------------------------------
|
|
;; Token scanners (pseudo-code — each target implements natively)
|
|
;; --------------------------------------------------------------------------
|
|
|
|
(define scan-string
|
|
(fn ()
|
|
;; Scan from opening " to closing ", handling escape sequences.
|
|
;; Returns ("string" value line col).
|
|
;; Escape sequences: \" \\ \n \t \r
|
|
(let ((start-line line)
|
|
(start-col col)
|
|
(result ""))
|
|
(advance-pos!) ;; skip opening "
|
|
(define scan-str-loop
|
|
(fn ()
|
|
(if (>= pos (len source))
|
|
(error "Unterminated string")
|
|
(let ((ch (nth source pos)))
|
|
(cond
|
|
(= ch "\"")
|
|
(do (advance-pos!) nil) ;; done
|
|
(= ch "\\")
|
|
(do (advance-pos!)
|
|
(let ((esc (nth source pos)))
|
|
(set! result (str result
|
|
(case esc
|
|
"n" "\n"
|
|
"t" "\t"
|
|
"r" "\r"
|
|
:else esc)))
|
|
(advance-pos!)
|
|
(scan-str-loop)))
|
|
:else
|
|
(do (set! result (str result ch))
|
|
(advance-pos!)
|
|
(scan-str-loop)))))))
|
|
(scan-str-loop)
|
|
(list "string" result start-line start-col))))
|
|
|
|
|
|
(define scan-keyword
|
|
(fn ()
|
|
;; Scan :identifier
|
|
(let ((start-line line) (start-col col))
|
|
(advance-pos!) ;; skip :
|
|
(let ((name (scan-ident-chars)))
|
|
(list "keyword" name start-line start-col)))))
|
|
|
|
|
|
(define scan-number
|
|
(fn ()
|
|
;; Scan integer or float literal
|
|
(let ((start-line line) (start-col col) (buf ""))
|
|
(when (= (nth source pos) "-")
|
|
(set! buf "-")
|
|
(advance-pos!))
|
|
;; Integer part
|
|
(define scan-digits
|
|
(fn ()
|
|
(when (and (< pos (len source)) (digit? (nth source pos)))
|
|
(set! buf (str buf (nth source pos)))
|
|
(advance-pos!)
|
|
(scan-digits))))
|
|
(scan-digits)
|
|
;; Decimal part
|
|
(when (and (< pos (len source)) (= (nth source pos) "."))
|
|
(set! buf (str buf "."))
|
|
(advance-pos!)
|
|
(scan-digits))
|
|
;; Exponent
|
|
(when (and (< pos (len source))
|
|
(or (= (nth source pos) "e") (= (nth source pos) "E")))
|
|
(set! buf (str buf (nth source pos)))
|
|
(advance-pos!)
|
|
(when (and (< pos (len source))
|
|
(or (= (nth source pos) "+") (= (nth source pos) "-")))
|
|
(set! buf (str buf (nth source pos)))
|
|
(advance-pos!))
|
|
(scan-digits))
|
|
(list "number" (parse-number buf) start-line start-col))))
|
|
|
|
|
|
(define scan-symbol
|
|
(fn ()
|
|
;; Scan identifier, check for true/false/nil
|
|
(let ((start-line line)
|
|
(start-col col)
|
|
(name (scan-ident-chars)))
|
|
(cond
|
|
(= name "true") (list "boolean" true start-line start-col)
|
|
(= name "false") (list "boolean" false start-line start-col)
|
|
(= name "nil") (list "nil" nil start-line start-col)
|
|
:else (list "symbol" name start-line start-col)))))
|
|
|
|
|
|
;; --------------------------------------------------------------------------
|
|
;; Parser — tokens → AST
|
|
;; --------------------------------------------------------------------------
|
|
|
|
(define parse
|
|
(fn (tokens)
|
|
;; Parse all top-level expressions from token stream.
|
|
(let ((pos 0)
|
|
(exprs (list)))
|
|
(define parse-loop
|
|
(fn ()
|
|
(when (< pos (len tokens))
|
|
(let ((result (parse-expr tokens)))
|
|
(append! exprs result)
|
|
(parse-loop)))))
|
|
(parse-loop)
|
|
exprs)))
|
|
|
|
|
|
(define parse-expr
|
|
(fn (tokens)
|
|
;; Parse a single expression.
|
|
(let ((tok (nth tokens pos)))
|
|
(case (first tok) ;; token type
|
|
"lparen"
|
|
(do (set! pos (inc pos))
|
|
(parse-list tokens "rparen"))
|
|
|
|
"lbracket"
|
|
(do (set! pos (inc pos))
|
|
(parse-list tokens "rbracket"))
|
|
|
|
"string" (do (set! pos (inc pos)) (nth tok 1))
|
|
"number" (do (set! pos (inc pos)) (nth tok 1))
|
|
"boolean" (do (set! pos (inc pos)) (nth tok 1))
|
|
"nil" (do (set! pos (inc pos)) nil)
|
|
|
|
"keyword"
|
|
(do (set! pos (inc pos))
|
|
(make-keyword (nth tok 1)))
|
|
|
|
"symbol"
|
|
(do (set! pos (inc pos))
|
|
(make-symbol (nth tok 1)))
|
|
|
|
:else (error (str "Unexpected token: " (inspect tok)))))))
|
|
|
|
|
|
(define parse-list
|
|
(fn (tokens close-type)
|
|
;; Parse expressions until close-type token.
|
|
(let ((items (list)))
|
|
(define parse-list-loop
|
|
(fn ()
|
|
(if (>= pos (len tokens))
|
|
(error "Unterminated list")
|
|
(if (= (first (nth tokens pos)) close-type)
|
|
(do (set! pos (inc pos)) nil) ;; done
|
|
(do (append! items (parse-expr tokens))
|
|
(parse-list-loop))))))
|
|
(parse-list-loop)
|
|
items)))
|
|
|
|
|
|
;; --------------------------------------------------------------------------
|
|
;; Serializer — AST → SX source text
|
|
;; --------------------------------------------------------------------------
|
|
|
|
(define serialize
|
|
(fn (val)
|
|
(case (type-of val)
|
|
"nil" "nil"
|
|
"boolean" (if val "true" "false")
|
|
"number" (str val)
|
|
"string" (str "\"" (escape-string val) "\"")
|
|
"symbol" (symbol-name val)
|
|
"keyword" (str ":" (keyword-name val))
|
|
"list" (str "(" (join " " (map serialize val)) ")")
|
|
"dict" (serialize-dict val)
|
|
"sx-expr" (sx-expr-source val)
|
|
:else (str val))))
|
|
|
|
|
|
(define serialize-dict
|
|
(fn (d)
|
|
(str "(dict "
|
|
(join " "
|
|
(reduce
|
|
(fn (acc key)
|
|
(concat acc (list (str ":" key) (serialize (dict-get d key)))))
|
|
(list)
|
|
(keys d)))
|
|
")")))
|
|
|
|
|
|
;; --------------------------------------------------------------------------
|
|
;; Platform parser interface
|
|
;; --------------------------------------------------------------------------
|
|
;;
|
|
;; Character classification:
|
|
;; (whitespace? ch) → boolean
|
|
;; (digit? ch) → boolean
|
|
;; (ident-start? ch) → boolean (letter, _, ~, *, +, -, etc.)
|
|
;; (ident-char? ch) → boolean (ident-start + digits, ., :)
|
|
;;
|
|
;; Constructors:
|
|
;; (make-symbol name) → Symbol value
|
|
;; (make-keyword name) → Keyword value
|
|
;; (parse-number s) → number (int or float from string)
|
|
;;
|
|
;; String utilities:
|
|
;; (escape-string s) → string with " and \ escaped
|
|
;; (sx-expr-source e) → unwrap SxExpr to its source string
|
|
;;
|
|
;; Cursor state (mutable — each target manages its own way):
|
|
;; pos, line, col — current position in source
|
|
;; (advance-pos!) → increment pos, update line/col
|
|
;; (skip-to-eol!) → advance past end of line
|
|
;; (scan-ident-chars) → consume and return identifier string
|
|
;; --------------------------------------------------------------------------
|