;; ========================================================================== ;; parser.sx — Reference SX parser specification ;; ;; Defines how SX source text is tokenized and parsed into AST. ;; The parser is intentionally simple — s-expressions need minimal parsing. ;; ;; Single-pass recursive descent: reads source text directly into AST, ;; no separate tokenization phase. All mutable cursor state lives inside ;; the parse closure. ;; ;; Grammar: ;; program → expr* ;; expr → atom | list | vector | map | quote-sugar ;; list → '(' expr* ')' ;; vector → '[' expr* ']' (sugar for list) ;; map → '{' (key expr)* '}' ;; atom → string | number | rational | keyword | symbol | boolean | nil | char ;; string → '"' (char | escape)* '"' ;; number → '-'? digit+ ('.' digit+)? ([eE] [+-]? digit+)? ;; rational → integer '/' digit+ ;; keyword → ':' ident ;; symbol → ident ;; boolean → 'true' | 'false' ;; nil → 'nil' ;; char → '#\' (ident | single-char) ;; ident → ident-start ident-char* ;; comment → ';' to end of line (discarded) ;; ;; Quote sugar: ;; 'expr → (quote expr) ;; `expr → (quasiquote expr) ;; ,expr → (unquote expr) ;; ,@expr → (splice-unquote expr) ;; ;; Reader macros: ;; #;expr → datum comment (read and discard expr) ;; #|raw chars| → raw string literal (no escape processing) ;; #'expr → (quote expr) ;; #\a → character literal (char value) ;; #\space → named character (space = 32) ;; #name expr → extensible dispatch (calls registered handler) ;; ;; Platform interface (each target implements natively): ;; (ident-start? ch) → boolean ;; (ident-char? ch) → boolean ;; (make-symbol name) → Symbol value ;; (make-keyword name) → Keyword value ;; (escape-string s) → string with " and \ escaped for serialization ;; (make-char n) → Char value from Unicode codepoint ;; (make-rational n d) → Rational value (auto-reduced by GCD) ;; (char->integer c) → Unicode codepoint of char c ;; (char-from-code n) → single-char string from codepoint ;; (char-code s) → codepoint of first char in string s ;; ========================================================================== ;; -------------------------------------------------------------------------- ;; Parser — single-pass recursive descent ;; -------------------------------------------------------------------------- ;; Returns a list of top-level AST expressions. ;; Parse SX source string into AST (define sx-parse :effects () (fn ((source :as string)) (let ((pos 0) (len-src (len source))) (define skip-comment :effects () (fn () (when (and (< pos len-src) (not (= (nth source pos) "\n"))) (set! pos (inc pos)) (skip-comment)))) (define skip-ws :effects () (fn () (when (< pos len-src) (let ((ch (nth source pos))) (cond (or (= ch " ") (= ch "\t") (= ch "\n") (= ch "\r")) (do (set! pos (inc pos)) (skip-ws)) (= ch ";") (do (set! pos (inc pos)) (skip-comment) (skip-ws)) :else nil))))) (define hex-digit-value :effects () (fn (ch) (index-of "0123456789abcdef" (lower ch)))) (define read-string :effects () (fn () (set! pos (inc pos)) (let ((buf "")) (define read-str-loop :effects () (fn () (if (>= pos len-src) (error "Unterminated string") (let ((ch (nth source pos))) (cond (= ch "\"") (do (set! pos (inc pos)) nil) (= ch "\\") (do (set! pos (inc pos)) (let ((esc (nth source pos))) (if (= esc "u") (do (set! pos (inc pos)) (let ((d0 (hex-digit-value (nth source pos))) (_ (set! pos (inc pos))) (d1 (hex-digit-value (nth source pos))) (_ (set! pos (inc pos))) (d2 (hex-digit-value (nth source pos))) (_ (set! pos (inc pos))) (d3 (hex-digit-value (nth source pos))) (_ (set! pos (inc pos)))) (set! buf (str buf (char-from-code (+ (* d0 4096) (* d1 256) (* d2 16) d3)))) (read-str-loop))) (do (set! buf (str buf (cond (= esc "n") "\n" (= esc "t") "\t" (= esc "r") "\r" :else esc))) (set! pos (inc pos)) (read-str-loop))))) :else (do (set! buf (str buf ch)) (set! pos (inc pos)) (read-str-loop))))))) (read-str-loop) buf))) (define read-ident :effects () (fn () (let ((start pos)) (define read-ident-loop :effects () (fn () (when (and (< pos len-src) (ident-char? (nth source pos))) (set! pos (inc pos)) (read-ident-loop)))) (read-ident-loop) (slice source start pos)))) (define read-keyword :effects () (fn () (set! pos (inc pos)) (make-keyword (read-ident)))) (define read-number :effects () (fn () (let ((start pos)) (when (and (< pos len-src) (= (nth source pos) "-")) (set! pos (inc pos))) (define read-digits :effects () (fn () (when (and (< pos len-src) (let ((c (nth source pos))) (and (>= c "0") (<= c "9")))) (set! pos (inc pos)) (read-digits)))) (read-digits) (if (and (< pos len-src) (= (nth source pos) "/") (< (inc pos) len-src) (let ((nc (nth source (inc pos)))) (and (>= nc "0") (<= nc "9")))) (let ((numer (parse-number (slice source start pos)))) (set! pos (inc pos)) (let ((denom-start pos)) (read-digits) (make-rational numer (parse-number (slice source denom-start pos))))) (do (when (and (< pos len-src) (= (nth source pos) ".")) (set! pos (inc pos)) (read-digits)) (when (and (< pos len-src) (or (= (nth source pos) "e") (= (nth source pos) "E"))) (set! pos (inc pos)) (when (and (< pos len-src) (or (= (nth source pos) "+") (= (nth source pos) "-"))) (set! pos (inc pos))) (read-digits)) (parse-number (slice source start pos))))))) (define read-symbol :effects () (fn () (let ((name (read-ident))) (cond (= name "true") true (= name "false") false (= name "nil") nil :else (make-symbol name))))) (define read-list :effects () (fn ((close-ch :as string)) (let ((items (list))) (define read-list-loop :effects () (fn () (skip-ws) (if (>= pos len-src) (error "Unterminated list") (if (= (nth source pos) close-ch) (do (set! pos (inc pos)) nil) (do (append! items (read-expr)) (read-list-loop)))))) (read-list-loop) items))) (define read-map :effects () (fn () (let ((result (dict))) (define read-map-loop :effects () (fn () (skip-ws) (if (>= pos len-src) (error "Unterminated map") (if (= (nth source pos) "}") (do (set! pos (inc pos)) nil) (let ((key-expr (read-expr)) (key-str (if (= (type-of key-expr) "keyword") (keyword-name key-expr) (str key-expr))) (val-expr (read-expr))) (dict-set! result key-str val-expr) (read-map-loop)))))) (read-map-loop) result))) (define read-raw-string :effects () (fn () (let ((buf "")) (define raw-loop :effects () (fn () (if (>= pos len-src) (error "Unterminated raw string") (let ((ch (nth source pos))) (if (= ch "|") (do (set! pos (inc pos)) nil) (do (set! buf (str buf ch)) (set! pos (inc pos)) (raw-loop))))))) (raw-loop) buf))) (define read-char-literal :effects () (fn () (if (>= pos len-src) (error "Unexpected end of input after #\\") (let ((first-ch (nth source pos))) (if (ident-start? first-ch) (let ((char-start pos)) (define read-char-name-loop :effects () (fn () (when (and (< pos len-src) (ident-char? (nth source pos))) (set! pos (inc pos)) (read-char-name-loop)))) (read-char-name-loop) (let ((char-name (slice source char-start pos))) (make-char (cond (= char-name "space") 32 (= char-name "newline") 10 (= char-name "tab") 9 (= char-name "nul") 0 (= char-name "null") 0 (= char-name "return") 13 (= char-name "escape") 27 (= char-name "delete") 127 (= char-name "backspace") 8 (= char-name "altmode") 27 (= char-name "rubout") 127 :else (char-code first-ch))))) (do (set! pos (inc pos)) (make-char (char-code first-ch)))))))) (define read-expr :effects () (fn () (skip-ws) (if (>= pos len-src) (error "Unexpected end of input") (let ((ch (nth source pos))) (cond (= ch "(") (do (set! pos (inc pos)) (read-list ")")) (= ch "[") (do (set! pos (inc pos)) (read-list "]")) (= ch "{") (do (set! pos (inc pos)) (read-map)) (= ch "\"") (read-string) (= ch ":") (read-keyword) (= ch "'") (do (set! pos (inc pos)) (list (make-symbol "quote") (read-expr))) (= ch "`") (do (set! pos (inc pos)) (list (make-symbol "quasiquote") (read-expr))) (= ch ",") (do (set! pos (inc pos)) (if (and (< pos len-src) (= (nth source pos) "@")) (do (set! pos (inc pos)) (list (make-symbol "splice-unquote") (read-expr))) (list (make-symbol "unquote") (read-expr)))) (= ch "#") (do (set! pos (inc pos)) (if (>= pos len-src) (error "Unexpected end of input after #") (let ((dispatch-ch (nth source pos))) (cond (= dispatch-ch ";") (do (set! pos (inc pos)) (read-expr) (read-expr)) (= dispatch-ch "|") (do (set! pos (inc pos)) (read-raw-string)) (= dispatch-ch "'") (do (set! pos (inc pos)) (list (make-symbol "quote") (read-expr))) (= dispatch-ch "\\") (do (set! pos (inc pos)) (read-char-literal)) (ident-start? dispatch-ch) (let ((macro-name (read-ident))) (let ((handler (reader-macro-get macro-name))) (if handler (handler (read-expr)) (error (str "Unknown reader macro: #" macro-name))))) :else (error (str "Unknown reader macro: #" dispatch-ch)))))) (or (and (>= ch "0") (<= ch "9")) (and (= ch "-") (< (inc pos) len-src) (let ((next-ch (nth source (inc pos)))) (and (>= next-ch "0") (<= next-ch "9"))))) (read-number) (and (= ch ".") (< (+ pos 2) len-src) (= (nth source (+ pos 1)) ".") (= (nth source (+ pos 2)) ".")) (do (set! pos (+ pos 3)) (make-symbol "...")) (ident-start? ch) (read-symbol) :else (error (str "Unexpected character: " ch))))))) (let ((exprs (list))) (define parse-loop :effects () (fn () (skip-ws) (when (< pos len-src) (append! exprs (read-expr)) (parse-loop)))) (parse-loop) exprs)))) ;; -------------------------------------------------------------------------- ;; Serializer — AST → SX source text ;; -------------------------------------------------------------------------- ;; Serialize AST value back to SX source (define sx-serialize :effects () (fn (val) (case (type-of val) "nil" "nil" "boolean" (if val "true" "false") "number" (str val) "rational" (str (numerator val) "/" (denominator val)) "string" (str "\"" (escape-string val) "\"") "symbol" (symbol-name val) "keyword" (str ":" (keyword-name val)) "list" (str "(" (join " " (map sx-serialize val)) ")") "dict" (sx-serialize-dict val) "sx-expr" (sx-expr-source val) "spread" (str "(make-spread " (sx-serialize-dict (spread-attrs val)) ")") "char" (let ((n (char->integer val))) (str "#\\" (cond (= n 32) "space" (= n 10) "newline" (= n 9) "tab" (= n 13) "return" (= n 0) "nul" (= n 27) "escape" (= n 127) "delete" (= n 8) "backspace" :else (char-from-code n)))) :else (str val)))) ;; Serialize a dict to SX {:key val} format (define sx-serialize-dict :effects () (fn ((d :as dict)) (str "{" (join " " (reduce (fn ((acc :as list) (key :as string)) (concat acc (list (str ":" key) (sx-serialize (dict-get d key))))) (list) (keys d))) "}"))) ;; Alias: adapters use (serialize val) — canonicalize to sx-serialize (define serialize sx-serialize) ;; -------------------------------------------------------------------------- ;; Platform parser interface ;; -------------------------------------------------------------------------- ;; ;; Character classification (implemented natively per target): ;; (ident-start? ch) → boolean ;; True for: a-z A-Z _ ~ * + - > < = / ! ? & ;; ;; (ident-char? ch) → boolean ;; True for: ident-start chars plus: 0-9 . : / # , ;; ;; Constructors (provided by the SX runtime): ;; (make-symbol name) → Symbol value ;; (make-keyword name) → Keyword value ;; (parse-number s) → number (int or float from string) ;; (make-char n) → Char value from Unicode codepoint n ;; (make-rational n d) → Rational value (auto-reduced by GCD; d=0 is an error) ;; (char->integer c) → Unicode codepoint of char c ;; ;; String utilities: ;; (escape-string s) → string with " and \ escaped ;; (sx-expr-source e) → unwrap SxExpr to its source string ;; (char-from-code n) → single-char string from codepoint n ;; (char-code s) → codepoint of first char in string s ;; ;; Reader macro registry: ;; (reader-macro-get name) → handler fn or nil ;; (reader-macro-set! name handler) → register a reader macro ;; --------------------------------------------------------------------------