Files
rose-ash/shared/sx/ref/parser.sx
giles 235428628a Add reference SX evaluator written in s-expressions
Meta-circular evaluator: the SX language specifying its own semantics.
A thin bootstrap compiler per target (JS, Python, Rust) reads these
.sx files and emits a native evaluator.

Files:
- eval.sx: Core evaluator — type dispatch, special forms, TCO trampoline,
  lambda/component/macro invocation, higher-order forms
- primitives.sx: Declarative specification of ~80 built-in pure functions
- render.sx: Three rendering modes (DOM, HTML string, SX wire format)
- parser.sx: Tokenizer, parser, and serializer specification

Platform-specific concerns (DOM ops, async I/O, HTML emission) are
declared as interfaces that each target implements.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-05 09:31:40 +00:00

320 lines
10 KiB
Plaintext

;; ==========================================================================
;; parser.sx — Reference SX parser specification
;;
;; Defines how SX source text is tokenized and parsed into AST.
;; The parser is intentionally simple — s-expressions need minimal parsing.
;;
;; Grammar:
;; program → expr*
;; expr → atom | list | quote-sugar
;; list → '(' expr* ')'
;; atom → string | number | keyword | symbol | boolean | nil
;; string → '"' (char | escape)* '"'
;; number → '-'? digit+ ('.' digit+)? ([eE] [+-]? digit+)?
;; keyword → ':' ident
;; symbol → ident
;; boolean → 'true' | 'false'
;; nil → 'nil'
;; ident → [a-zA-Z_~*+\-><=/!?&] [a-zA-Z0-9_~*+\-><=/!?.:&]*
;; comment → ';' to end of line (discarded)
;;
;; Quote sugar (optional — not used in current SX):
;; '(expr) → (quote expr)
;; `(expr) → (quasiquote expr)
;; ~(expr) → (unquote expr)
;; ~@(expr) → (splice-unquote expr)
;; ==========================================================================
;; --------------------------------------------------------------------------
;; Tokenizer
;; --------------------------------------------------------------------------
;; Produces a flat stream of tokens from source text.
;; Each token is a (type value line col) tuple.
(define tokenize
(fn (source)
(let ((pos 0)
(line 1)
(col 1)
(tokens (list))
(len-src (len source)))
;; Main loop — bootstrap compilers convert to while
(define scan-next
(fn ()
(when (< pos len-src)
(let ((ch (nth source pos)))
(cond
;; Whitespace — skip
(whitespace? ch)
(do (advance-pos!) (scan-next))
;; Comment — skip to end of line
(= ch ";")
(do (skip-to-eol!) (scan-next))
;; String
(= ch "\"")
(do (append! tokens (scan-string)) (scan-next))
;; Open paren
(= ch "(")
(do (append! tokens (list "lparen" "(" line col))
(advance-pos!)
(scan-next))
;; Close paren
(= ch ")")
(do (append! tokens (list "rparen" ")" line col))
(advance-pos!)
(scan-next))
;; Open bracket (list sugar)
(= ch "[")
(do (append! tokens (list "lbracket" "[" line col))
(advance-pos!)
(scan-next))
;; Close bracket
(= ch "]")
(do (append! tokens (list "rbracket" "]" line col))
(advance-pos!)
(scan-next))
;; Keyword
(= ch ":")
(do (append! tokens (scan-keyword)) (scan-next))
;; Number (or negative number)
(or (digit? ch)
(and (= ch "-") (< (inc pos) len-src)
(digit? (nth source (inc pos)))))
(do (append! tokens (scan-number)) (scan-next))
;; Symbol
(ident-start? ch)
(do (append! tokens (scan-symbol)) (scan-next))
;; Unknown — skip
:else
(do (advance-pos!) (scan-next)))))))
(scan-next)
tokens)))
;; --------------------------------------------------------------------------
;; Token scanners (pseudo-code — each target implements natively)
;; --------------------------------------------------------------------------
(define scan-string
(fn ()
;; Scan from opening " to closing ", handling escape sequences.
;; Returns ("string" value line col).
;; Escape sequences: \" \\ \n \t \r
(let ((start-line line)
(start-col col)
(result ""))
(advance-pos!) ;; skip opening "
(define scan-str-loop
(fn ()
(if (>= pos (len source))
(error "Unterminated string")
(let ((ch (nth source pos)))
(cond
(= ch "\"")
(do (advance-pos!) nil) ;; done
(= ch "\\")
(do (advance-pos!)
(let ((esc (nth source pos)))
(set! result (str result
(case esc
"n" "\n"
"t" "\t"
"r" "\r"
:else esc)))
(advance-pos!)
(scan-str-loop)))
:else
(do (set! result (str result ch))
(advance-pos!)
(scan-str-loop)))))))
(scan-str-loop)
(list "string" result start-line start-col))))
(define scan-keyword
(fn ()
;; Scan :identifier
(let ((start-line line) (start-col col))
(advance-pos!) ;; skip :
(let ((name (scan-ident-chars)))
(list "keyword" name start-line start-col)))))
(define scan-number
(fn ()
;; Scan integer or float literal
(let ((start-line line) (start-col col) (buf ""))
(when (= (nth source pos) "-")
(set! buf "-")
(advance-pos!))
;; Integer part
(define scan-digits
(fn ()
(when (and (< pos (len source)) (digit? (nth source pos)))
(set! buf (str buf (nth source pos)))
(advance-pos!)
(scan-digits))))
(scan-digits)
;; Decimal part
(when (and (< pos (len source)) (= (nth source pos) "."))
(set! buf (str buf "."))
(advance-pos!)
(scan-digits))
;; Exponent
(when (and (< pos (len source))
(or (= (nth source pos) "e") (= (nth source pos) "E")))
(set! buf (str buf (nth source pos)))
(advance-pos!)
(when (and (< pos (len source))
(or (= (nth source pos) "+") (= (nth source pos) "-")))
(set! buf (str buf (nth source pos)))
(advance-pos!))
(scan-digits))
(list "number" (parse-number buf) start-line start-col))))
(define scan-symbol
(fn ()
;; Scan identifier, check for true/false/nil
(let ((start-line line)
(start-col col)
(name (scan-ident-chars)))
(cond
(= name "true") (list "boolean" true start-line start-col)
(= name "false") (list "boolean" false start-line start-col)
(= name "nil") (list "nil" nil start-line start-col)
:else (list "symbol" name start-line start-col)))))
;; --------------------------------------------------------------------------
;; Parser — tokens → AST
;; --------------------------------------------------------------------------
(define parse
(fn (tokens)
;; Parse all top-level expressions from token stream.
(let ((pos 0)
(exprs (list)))
(define parse-loop
(fn ()
(when (< pos (len tokens))
(let ((result (parse-expr tokens)))
(append! exprs result)
(parse-loop)))))
(parse-loop)
exprs)))
(define parse-expr
(fn (tokens)
;; Parse a single expression.
(let ((tok (nth tokens pos)))
(case (first tok) ;; token type
"lparen"
(do (set! pos (inc pos))
(parse-list tokens "rparen"))
"lbracket"
(do (set! pos (inc pos))
(parse-list tokens "rbracket"))
"string" (do (set! pos (inc pos)) (nth tok 1))
"number" (do (set! pos (inc pos)) (nth tok 1))
"boolean" (do (set! pos (inc pos)) (nth tok 1))
"nil" (do (set! pos (inc pos)) nil)
"keyword"
(do (set! pos (inc pos))
(make-keyword (nth tok 1)))
"symbol"
(do (set! pos (inc pos))
(make-symbol (nth tok 1)))
:else (error (str "Unexpected token: " (inspect tok)))))))
(define parse-list
(fn (tokens close-type)
;; Parse expressions until close-type token.
(let ((items (list)))
(define parse-list-loop
(fn ()
(if (>= pos (len tokens))
(error "Unterminated list")
(if (= (first (nth tokens pos)) close-type)
(do (set! pos (inc pos)) nil) ;; done
(do (append! items (parse-expr tokens))
(parse-list-loop))))))
(parse-list-loop)
items)))
;; --------------------------------------------------------------------------
;; Serializer — AST → SX source text
;; --------------------------------------------------------------------------
(define serialize
(fn (val)
(case (type-of val)
"nil" "nil"
"boolean" (if val "true" "false")
"number" (str val)
"string" (str "\"" (escape-string val) "\"")
"symbol" (symbol-name val)
"keyword" (str ":" (keyword-name val))
"list" (str "(" (join " " (map serialize val)) ")")
"dict" (serialize-dict val)
"sx-expr" (sx-expr-source val)
:else (str val))))
(define serialize-dict
(fn (d)
(str "(dict "
(join " "
(reduce
(fn (acc key)
(concat acc (list (str ":" key) (serialize (dict-get d key)))))
(list)
(keys d)))
")")))
;; --------------------------------------------------------------------------
;; Platform parser interface
;; --------------------------------------------------------------------------
;;
;; Character classification:
;; (whitespace? ch) → boolean
;; (digit? ch) → boolean
;; (ident-start? ch) → boolean (letter, _, ~, *, +, -, etc.)
;; (ident-char? ch) → boolean (ident-start + digits, ., :)
;;
;; Constructors:
;; (make-symbol name) → Symbol value
;; (make-keyword name) → Keyword value
;; (parse-number s) → number (int or float from string)
;;
;; String utilities:
;; (escape-string s) → string with " and \ escaped
;; (sx-expr-source e) → unwrap SxExpr to its source string
;;
;; Cursor state (mutable — each target manages its own way):
;; pos, line, col — current position in source
;; (advance-pos!) → increment pos, update line/col
;; (skip-to-eol!) → advance past end of line
;; (scan-ident-chars) → consume and return identifier string
;; --------------------------------------------------------------------------