Add reference SX evaluator written in s-expressions
Meta-circular evaluator: the SX language specifying its own semantics. A thin bootstrap compiler per target (JS, Python, Rust) reads these .sx files and emits a native evaluator. Files: - eval.sx: Core evaluator — type dispatch, special forms, TCO trampoline, lambda/component/macro invocation, higher-order forms - primitives.sx: Declarative specification of ~80 built-in pure functions - render.sx: Three rendering modes (DOM, HTML string, SX wire format) - parser.sx: Tokenizer, parser, and serializer specification Platform-specific concerns (DOM ops, async I/O, HTML emission) are declared as interfaces that each target implements. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
319
shared/sx/ref/parser.sx
Normal file
319
shared/sx/ref/parser.sx
Normal file
@@ -0,0 +1,319 @@
|
||||
;; ==========================================================================
|
||||
;; parser.sx — Reference SX parser specification
|
||||
;;
|
||||
;; Defines how SX source text is tokenized and parsed into AST.
|
||||
;; The parser is intentionally simple — s-expressions need minimal parsing.
|
||||
;;
|
||||
;; Grammar:
|
||||
;; program → expr*
|
||||
;; expr → atom | list | quote-sugar
|
||||
;; list → '(' expr* ')'
|
||||
;; atom → string | number | keyword | symbol | boolean | nil
|
||||
;; string → '"' (char | escape)* '"'
|
||||
;; number → '-'? digit+ ('.' digit+)? ([eE] [+-]? digit+)?
|
||||
;; keyword → ':' ident
|
||||
;; symbol → ident
|
||||
;; boolean → 'true' | 'false'
|
||||
;; nil → 'nil'
|
||||
;; ident → [a-zA-Z_~*+\-><=/!?&] [a-zA-Z0-9_~*+\-><=/!?.:&]*
|
||||
;; comment → ';' to end of line (discarded)
|
||||
;;
|
||||
;; Quote sugar (optional — not used in current SX):
|
||||
;; '(expr) → (quote expr)
|
||||
;; `(expr) → (quasiquote expr)
|
||||
;; ~(expr) → (unquote expr)
|
||||
;; ~@(expr) → (splice-unquote expr)
|
||||
;; ==========================================================================
|
||||
|
||||
|
||||
;; --------------------------------------------------------------------------
|
||||
;; Tokenizer
|
||||
;; --------------------------------------------------------------------------
|
||||
;; Produces a flat stream of tokens from source text.
|
||||
;; Each token is a (type value line col) tuple.
|
||||
|
||||
(define tokenize
|
||||
(fn (source)
|
||||
(let ((pos 0)
|
||||
(line 1)
|
||||
(col 1)
|
||||
(tokens (list))
|
||||
(len-src (len source)))
|
||||
;; Main loop — bootstrap compilers convert to while
|
||||
(define scan-next
|
||||
(fn ()
|
||||
(when (< pos len-src)
|
||||
(let ((ch (nth source pos)))
|
||||
(cond
|
||||
;; Whitespace — skip
|
||||
(whitespace? ch)
|
||||
(do (advance-pos!) (scan-next))
|
||||
|
||||
;; Comment — skip to end of line
|
||||
(= ch ";")
|
||||
(do (skip-to-eol!) (scan-next))
|
||||
|
||||
;; String
|
||||
(= ch "\"")
|
||||
(do (append! tokens (scan-string)) (scan-next))
|
||||
|
||||
;; Open paren
|
||||
(= ch "(")
|
||||
(do (append! tokens (list "lparen" "(" line col))
|
||||
(advance-pos!)
|
||||
(scan-next))
|
||||
|
||||
;; Close paren
|
||||
(= ch ")")
|
||||
(do (append! tokens (list "rparen" ")" line col))
|
||||
(advance-pos!)
|
||||
(scan-next))
|
||||
|
||||
;; Open bracket (list sugar)
|
||||
(= ch "[")
|
||||
(do (append! tokens (list "lbracket" "[" line col))
|
||||
(advance-pos!)
|
||||
(scan-next))
|
||||
|
||||
;; Close bracket
|
||||
(= ch "]")
|
||||
(do (append! tokens (list "rbracket" "]" line col))
|
||||
(advance-pos!)
|
||||
(scan-next))
|
||||
|
||||
;; Keyword
|
||||
(= ch ":")
|
||||
(do (append! tokens (scan-keyword)) (scan-next))
|
||||
|
||||
;; Number (or negative number)
|
||||
(or (digit? ch)
|
||||
(and (= ch "-") (< (inc pos) len-src)
|
||||
(digit? (nth source (inc pos)))))
|
||||
(do (append! tokens (scan-number)) (scan-next))
|
||||
|
||||
;; Symbol
|
||||
(ident-start? ch)
|
||||
(do (append! tokens (scan-symbol)) (scan-next))
|
||||
|
||||
;; Unknown — skip
|
||||
:else
|
||||
(do (advance-pos!) (scan-next)))))))
|
||||
(scan-next)
|
||||
tokens)))
|
||||
|
||||
|
||||
;; --------------------------------------------------------------------------
|
||||
;; Token scanners (pseudo-code — each target implements natively)
|
||||
;; --------------------------------------------------------------------------
|
||||
|
||||
(define scan-string
|
||||
(fn ()
|
||||
;; Scan from opening " to closing ", handling escape sequences.
|
||||
;; Returns ("string" value line col).
|
||||
;; Escape sequences: \" \\ \n \t \r
|
||||
(let ((start-line line)
|
||||
(start-col col)
|
||||
(result ""))
|
||||
(advance-pos!) ;; skip opening "
|
||||
(define scan-str-loop
|
||||
(fn ()
|
||||
(if (>= pos (len source))
|
||||
(error "Unterminated string")
|
||||
(let ((ch (nth source pos)))
|
||||
(cond
|
||||
(= ch "\"")
|
||||
(do (advance-pos!) nil) ;; done
|
||||
(= ch "\\")
|
||||
(do (advance-pos!)
|
||||
(let ((esc (nth source pos)))
|
||||
(set! result (str result
|
||||
(case esc
|
||||
"n" "\n"
|
||||
"t" "\t"
|
||||
"r" "\r"
|
||||
:else esc)))
|
||||
(advance-pos!)
|
||||
(scan-str-loop)))
|
||||
:else
|
||||
(do (set! result (str result ch))
|
||||
(advance-pos!)
|
||||
(scan-str-loop)))))))
|
||||
(scan-str-loop)
|
||||
(list "string" result start-line start-col))))
|
||||
|
||||
|
||||
(define scan-keyword
|
||||
(fn ()
|
||||
;; Scan :identifier
|
||||
(let ((start-line line) (start-col col))
|
||||
(advance-pos!) ;; skip :
|
||||
(let ((name (scan-ident-chars)))
|
||||
(list "keyword" name start-line start-col)))))
|
||||
|
||||
|
||||
(define scan-number
|
||||
(fn ()
|
||||
;; Scan integer or float literal
|
||||
(let ((start-line line) (start-col col) (buf ""))
|
||||
(when (= (nth source pos) "-")
|
||||
(set! buf "-")
|
||||
(advance-pos!))
|
||||
;; Integer part
|
||||
(define scan-digits
|
||||
(fn ()
|
||||
(when (and (< pos (len source)) (digit? (nth source pos)))
|
||||
(set! buf (str buf (nth source pos)))
|
||||
(advance-pos!)
|
||||
(scan-digits))))
|
||||
(scan-digits)
|
||||
;; Decimal part
|
||||
(when (and (< pos (len source)) (= (nth source pos) "."))
|
||||
(set! buf (str buf "."))
|
||||
(advance-pos!)
|
||||
(scan-digits))
|
||||
;; Exponent
|
||||
(when (and (< pos (len source))
|
||||
(or (= (nth source pos) "e") (= (nth source pos) "E")))
|
||||
(set! buf (str buf (nth source pos)))
|
||||
(advance-pos!)
|
||||
(when (and (< pos (len source))
|
||||
(or (= (nth source pos) "+") (= (nth source pos) "-")))
|
||||
(set! buf (str buf (nth source pos)))
|
||||
(advance-pos!))
|
||||
(scan-digits))
|
||||
(list "number" (parse-number buf) start-line start-col))))
|
||||
|
||||
|
||||
(define scan-symbol
|
||||
(fn ()
|
||||
;; Scan identifier, check for true/false/nil
|
||||
(let ((start-line line)
|
||||
(start-col col)
|
||||
(name (scan-ident-chars)))
|
||||
(cond
|
||||
(= name "true") (list "boolean" true start-line start-col)
|
||||
(= name "false") (list "boolean" false start-line start-col)
|
||||
(= name "nil") (list "nil" nil start-line start-col)
|
||||
:else (list "symbol" name start-line start-col)))))
|
||||
|
||||
|
||||
;; --------------------------------------------------------------------------
|
||||
;; Parser — tokens → AST
|
||||
;; --------------------------------------------------------------------------
|
||||
|
||||
(define parse
|
||||
(fn (tokens)
|
||||
;; Parse all top-level expressions from token stream.
|
||||
(let ((pos 0)
|
||||
(exprs (list)))
|
||||
(define parse-loop
|
||||
(fn ()
|
||||
(when (< pos (len tokens))
|
||||
(let ((result (parse-expr tokens)))
|
||||
(append! exprs result)
|
||||
(parse-loop)))))
|
||||
(parse-loop)
|
||||
exprs)))
|
||||
|
||||
|
||||
(define parse-expr
|
||||
(fn (tokens)
|
||||
;; Parse a single expression.
|
||||
(let ((tok (nth tokens pos)))
|
||||
(case (first tok) ;; token type
|
||||
"lparen"
|
||||
(do (set! pos (inc pos))
|
||||
(parse-list tokens "rparen"))
|
||||
|
||||
"lbracket"
|
||||
(do (set! pos (inc pos))
|
||||
(parse-list tokens "rbracket"))
|
||||
|
||||
"string" (do (set! pos (inc pos)) (nth tok 1))
|
||||
"number" (do (set! pos (inc pos)) (nth tok 1))
|
||||
"boolean" (do (set! pos (inc pos)) (nth tok 1))
|
||||
"nil" (do (set! pos (inc pos)) nil)
|
||||
|
||||
"keyword"
|
||||
(do (set! pos (inc pos))
|
||||
(make-keyword (nth tok 1)))
|
||||
|
||||
"symbol"
|
||||
(do (set! pos (inc pos))
|
||||
(make-symbol (nth tok 1)))
|
||||
|
||||
:else (error (str "Unexpected token: " (inspect tok)))))))
|
||||
|
||||
|
||||
(define parse-list
|
||||
(fn (tokens close-type)
|
||||
;; Parse expressions until close-type token.
|
||||
(let ((items (list)))
|
||||
(define parse-list-loop
|
||||
(fn ()
|
||||
(if (>= pos (len tokens))
|
||||
(error "Unterminated list")
|
||||
(if (= (first (nth tokens pos)) close-type)
|
||||
(do (set! pos (inc pos)) nil) ;; done
|
||||
(do (append! items (parse-expr tokens))
|
||||
(parse-list-loop))))))
|
||||
(parse-list-loop)
|
||||
items)))
|
||||
|
||||
|
||||
;; --------------------------------------------------------------------------
|
||||
;; Serializer — AST → SX source text
|
||||
;; --------------------------------------------------------------------------
|
||||
|
||||
(define serialize
|
||||
(fn (val)
|
||||
(case (type-of val)
|
||||
"nil" "nil"
|
||||
"boolean" (if val "true" "false")
|
||||
"number" (str val)
|
||||
"string" (str "\"" (escape-string val) "\"")
|
||||
"symbol" (symbol-name val)
|
||||
"keyword" (str ":" (keyword-name val))
|
||||
"list" (str "(" (join " " (map serialize val)) ")")
|
||||
"dict" (serialize-dict val)
|
||||
"sx-expr" (sx-expr-source val)
|
||||
:else (str val))))
|
||||
|
||||
|
||||
(define serialize-dict
|
||||
(fn (d)
|
||||
(str "(dict "
|
||||
(join " "
|
||||
(reduce
|
||||
(fn (acc key)
|
||||
(concat acc (list (str ":" key) (serialize (dict-get d key)))))
|
||||
(list)
|
||||
(keys d)))
|
||||
")")))
|
||||
|
||||
|
||||
;; --------------------------------------------------------------------------
|
||||
;; Platform parser interface
|
||||
;; --------------------------------------------------------------------------
|
||||
;;
|
||||
;; Character classification:
|
||||
;; (whitespace? ch) → boolean
|
||||
;; (digit? ch) → boolean
|
||||
;; (ident-start? ch) → boolean (letter, _, ~, *, +, -, etc.)
|
||||
;; (ident-char? ch) → boolean (ident-start + digits, ., :)
|
||||
;;
|
||||
;; Constructors:
|
||||
;; (make-symbol name) → Symbol value
|
||||
;; (make-keyword name) → Keyword value
|
||||
;; (parse-number s) → number (int or float from string)
|
||||
;;
|
||||
;; String utilities:
|
||||
;; (escape-string s) → string with " and \ escaped
|
||||
;; (sx-expr-source e) → unwrap SxExpr to its source string
|
||||
;;
|
||||
;; Cursor state (mutable — each target manages its own way):
|
||||
;; pos, line, col — current position in source
|
||||
;; (advance-pos!) → increment pos, update line/col
|
||||
;; (skip-to-eol!) → advance past end of line
|
||||
;; (scan-ident-chars) → consume and return identifier string
|
||||
;; --------------------------------------------------------------------------
|
||||
Reference in New Issue
Block a user