Files
mono/shared/sx/ref/parser.sx
giles a9526c4fa1 Update reference SX spec to match sx.js macros branch (CSSX, dict literals, new primitives)
- eval.sx: Add defstyle, defkeyframes, defhandler special forms; add ho-for-each
- parser.sx: Add dict {...} literal parsing and quasiquote/unquote sugar
- primitives.sx: Add parse-datetime, split-ids, css, merge-styles primitives
- render.sx: Add StyleValue handling, SVG filter elements, definition forms in render, fix render-to-html to handle HTML tags directly
- bootstrap_js.py: Add StyleValue type, buildKeyframes, isEvery platform helper, new primitives (format-date, parse-datetime, split-ids, css, merge-styles), dict/quasiquote parser, expose render functions as primitives
- sx-ref.js: Regenerated — 132/132 tests passing

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-05 10:17:28 +00:00

379 lines
13 KiB
Plaintext

;; ==========================================================================
;; parser.sx — Reference SX parser specification
;;
;; Defines how SX source text is tokenized and parsed into AST.
;; The parser is intentionally simple — s-expressions need minimal parsing.
;;
;; Grammar:
;; program → expr*
;; expr → atom | list | quote-sugar
;; list → '(' expr* ')'
;; atom → string | number | keyword | symbol | boolean | nil
;; string → '"' (char | escape)* '"'
;; number → '-'? digit+ ('.' digit+)? ([eE] [+-]? digit+)?
;; keyword → ':' ident
;; symbol → ident
;; boolean → 'true' | 'false'
;; nil → 'nil'
;; ident → [a-zA-Z_~*+\-><=/!?&] [a-zA-Z0-9_~*+\-><=/!?.:&]*
;; comment → ';' to end of line (discarded)
;;
;; Dict literal:
;; {key val ...} → dict object (keys are keywords or expressions)
;;
;; Quote sugar:
;; `(expr) → (quasiquote expr)
;; ,(expr) → (unquote expr)
;; ,@(expr) → (splice-unquote expr)
;; ==========================================================================
;; --------------------------------------------------------------------------
;; Tokenizer
;; --------------------------------------------------------------------------
;; Produces a flat stream of tokens from source text.
;; Each token is a (type value line col) tuple.
(define tokenize
(fn (source)
(let ((pos 0)
(line 1)
(col 1)
(tokens (list))
(len-src (len source)))
;; Main loop — bootstrap compilers convert to while
(define scan-next
(fn ()
(when (< pos len-src)
(let ((ch (nth source pos)))
(cond
;; Whitespace — skip
(whitespace? ch)
(do (advance-pos!) (scan-next))
;; Comment — skip to end of line
(= ch ";")
(do (skip-to-eol!) (scan-next))
;; String
(= ch "\"")
(do (append! tokens (scan-string)) (scan-next))
;; Open paren
(= ch "(")
(do (append! tokens (list "lparen" "(" line col))
(advance-pos!)
(scan-next))
;; Close paren
(= ch ")")
(do (append! tokens (list "rparen" ")" line col))
(advance-pos!)
(scan-next))
;; Open bracket (list sugar)
(= ch "[")
(do (append! tokens (list "lbracket" "[" line col))
(advance-pos!)
(scan-next))
;; Close bracket
(= ch "]")
(do (append! tokens (list "rbracket" "]" line col))
(advance-pos!)
(scan-next))
;; Open brace (dict literal)
(= ch "{")
(do (append! tokens (list "lbrace" "{" line col))
(advance-pos!)
(scan-next))
;; Close brace
(= ch "}")
(do (append! tokens (list "rbrace" "}" line col))
(advance-pos!)
(scan-next))
;; Quasiquote sugar
(= ch "`")
(do (advance-pos!)
(let ((inner (scan-next-expr)))
(append! tokens (list "quasiquote" inner line col))
(scan-next)))
;; Unquote / splice-unquote
(= ch ",")
(do (advance-pos!)
(if (and (< pos len-src) (= (nth source pos) "@"))
(do (advance-pos!)
(let ((inner (scan-next-expr)))
(append! tokens (list "splice-unquote" inner line col))
(scan-next)))
(let ((inner (scan-next-expr)))
(append! tokens (list "unquote" inner line col))
(scan-next))))
;; Keyword
(= ch ":")
(do (append! tokens (scan-keyword)) (scan-next))
;; Number (or negative number)
(or (digit? ch)
(and (= ch "-") (< (inc pos) len-src)
(digit? (nth source (inc pos)))))
(do (append! tokens (scan-number)) (scan-next))
;; Symbol
(ident-start? ch)
(do (append! tokens (scan-symbol)) (scan-next))
;; Unknown — skip
:else
(do (advance-pos!) (scan-next)))))))
(scan-next)
tokens)))
;; --------------------------------------------------------------------------
;; Token scanners (pseudo-code — each target implements natively)
;; --------------------------------------------------------------------------
(define scan-string
(fn ()
;; Scan from opening " to closing ", handling escape sequences.
;; Returns ("string" value line col).
;; Escape sequences: \" \\ \n \t \r
(let ((start-line line)
(start-col col)
(result ""))
(advance-pos!) ;; skip opening "
(define scan-str-loop
(fn ()
(if (>= pos (len source))
(error "Unterminated string")
(let ((ch (nth source pos)))
(cond
(= ch "\"")
(do (advance-pos!) nil) ;; done
(= ch "\\")
(do (advance-pos!)
(let ((esc (nth source pos)))
(set! result (str result
(case esc
"n" "\n"
"t" "\t"
"r" "\r"
:else esc)))
(advance-pos!)
(scan-str-loop)))
:else
(do (set! result (str result ch))
(advance-pos!)
(scan-str-loop)))))))
(scan-str-loop)
(list "string" result start-line start-col))))
(define scan-keyword
(fn ()
;; Scan :identifier
(let ((start-line line) (start-col col))
(advance-pos!) ;; skip :
(let ((name (scan-ident-chars)))
(list "keyword" name start-line start-col)))))
(define scan-number
(fn ()
;; Scan integer or float literal
(let ((start-line line) (start-col col) (buf ""))
(when (= (nth source pos) "-")
(set! buf "-")
(advance-pos!))
;; Integer part
(define scan-digits
(fn ()
(when (and (< pos (len source)) (digit? (nth source pos)))
(set! buf (str buf (nth source pos)))
(advance-pos!)
(scan-digits))))
(scan-digits)
;; Decimal part
(when (and (< pos (len source)) (= (nth source pos) "."))
(set! buf (str buf "."))
(advance-pos!)
(scan-digits))
;; Exponent
(when (and (< pos (len source))
(or (= (nth source pos) "e") (= (nth source pos) "E")))
(set! buf (str buf (nth source pos)))
(advance-pos!)
(when (and (< pos (len source))
(or (= (nth source pos) "+") (= (nth source pos) "-")))
(set! buf (str buf (nth source pos)))
(advance-pos!))
(scan-digits))
(list "number" (parse-number buf) start-line start-col))))
(define scan-symbol
(fn ()
;; Scan identifier, check for true/false/nil
(let ((start-line line)
(start-col col)
(name (scan-ident-chars)))
(cond
(= name "true") (list "boolean" true start-line start-col)
(= name "false") (list "boolean" false start-line start-col)
(= name "nil") (list "nil" nil start-line start-col)
:else (list "symbol" name start-line start-col)))))
;; --------------------------------------------------------------------------
;; Parser — tokens → AST
;; --------------------------------------------------------------------------
(define parse
(fn (tokens)
;; Parse all top-level expressions from token stream.
(let ((pos 0)
(exprs (list)))
(define parse-loop
(fn ()
(when (< pos (len tokens))
(let ((result (parse-expr tokens)))
(append! exprs result)
(parse-loop)))))
(parse-loop)
exprs)))
(define parse-expr
(fn (tokens)
;; Parse a single expression.
(let ((tok (nth tokens pos)))
(case (first tok) ;; token type
"lparen"
(do (set! pos (inc pos))
(parse-list tokens "rparen"))
"lbracket"
(do (set! pos (inc pos))
(parse-list tokens "rbracket"))
"lbrace"
(do (set! pos (inc pos))
(parse-dict tokens))
"string" (do (set! pos (inc pos)) (nth tok 1))
"number" (do (set! pos (inc pos)) (nth tok 1))
"boolean" (do (set! pos (inc pos)) (nth tok 1))
"nil" (do (set! pos (inc pos)) nil)
"keyword"
(do (set! pos (inc pos))
(make-keyword (nth tok 1)))
"symbol"
(do (set! pos (inc pos))
(make-symbol (nth tok 1)))
:else (error (str "Unexpected token: " (inspect tok)))))))
(define parse-list
(fn (tokens close-type)
;; Parse expressions until close-type token.
(let ((items (list)))
(define parse-list-loop
(fn ()
(if (>= pos (len tokens))
(error "Unterminated list")
(if (= (first (nth tokens pos)) close-type)
(do (set! pos (inc pos)) nil) ;; done
(do (append! items (parse-expr tokens))
(parse-list-loop))))))
(parse-list-loop)
items)))
(define parse-dict
(fn (tokens)
;; Parse {key val key val ...} until "rbrace" token.
;; Returns a dict (plain object).
(let ((result (dict)))
(define parse-dict-loop
(fn ()
(if (>= pos (len tokens))
(error "Unterminated dict")
(if (= (first (nth tokens pos)) "rbrace")
(do (set! pos (inc pos)) nil) ;; done
(let ((key-expr (parse-expr tokens))
(key-str (if (= (type-of key-expr) "keyword")
(keyword-name key-expr)
(str key-expr)))
(val-expr (parse-expr tokens)))
(dict-set! result key-str val-expr)
(parse-dict-loop))))))
(parse-dict-loop)
result)))
;; --------------------------------------------------------------------------
;; Serializer — AST → SX source text
;; --------------------------------------------------------------------------
(define serialize
(fn (val)
(case (type-of val)
"nil" "nil"
"boolean" (if val "true" "false")
"number" (str val)
"string" (str "\"" (escape-string val) "\"")
"symbol" (symbol-name val)
"keyword" (str ":" (keyword-name val))
"list" (str "(" (join " " (map serialize val)) ")")
"dict" (serialize-dict val)
"sx-expr" (sx-expr-source val)
:else (str val))))
(define serialize-dict
(fn (d)
(str "(dict "
(join " "
(reduce
(fn (acc key)
(concat acc (list (str ":" key) (serialize (dict-get d key)))))
(list)
(keys d)))
")")))
;; --------------------------------------------------------------------------
;; Platform parser interface
;; --------------------------------------------------------------------------
;;
;; Character classification:
;; (whitespace? ch) → boolean
;; (digit? ch) → boolean
;; (ident-start? ch) → boolean (letter, _, ~, *, +, -, etc.)
;; (ident-char? ch) → boolean (ident-start + digits, ., :)
;;
;; Constructors:
;; (make-symbol name) → Symbol value
;; (make-keyword name) → Keyword value
;; (parse-number s) → number (int or float from string)
;;
;; String utilities:
;; (escape-string s) → string with " and \ escaped
;; (sx-expr-source e) → unwrap SxExpr to its source string
;;
;; Cursor state (mutable — each target manages its own way):
;; pos, line, col — current position in source
;; (advance-pos!) → increment pos, update line/col
;; (skip-to-eol!) → advance past end of line
;; (scan-ident-chars) → consume and return identifier string
;; --------------------------------------------------------------------------