;; ========================================================================== ;; parser.sx — Reference SX parser specification ;; ;; Defines how SX source text is tokenized and parsed into AST. ;; The parser is intentionally simple — s-expressions need minimal parsing. ;; ;; Single-pass recursive descent: reads source text directly into AST, ;; no separate tokenization phase. All mutable cursor state lives inside ;; the parse closure. ;; ;; Grammar: ;; program → expr* ;; expr → atom | list | vector | map | quote-sugar ;; list → '(' expr* ')' ;; vector → '[' expr* ']' (sugar for list) ;; map → '{' (key expr)* '}' ;; atom → string | number | keyword | symbol | boolean | nil ;; string → '"' (char | escape)* '"' ;; number → '-'? digit+ ('.' digit+)? ([eE] [+-]? digit+)? ;; keyword → ':' ident ;; symbol → ident ;; boolean → 'true' | 'false' ;; nil → 'nil' ;; ident → ident-start ident-char* ;; comment → ';' to end of line (discarded) ;; ;; Quote sugar: ;; `expr → (quasiquote expr) ;; ,expr → (unquote expr) ;; ,@expr → (splice-unquote expr) ;; ;; Platform interface (each target implements natively): ;; (ident-start? ch) → boolean ;; (ident-char? ch) → boolean ;; (make-symbol name) → Symbol value ;; (make-keyword name) → Keyword value ;; (escape-string s) → string with " and \ escaped for serialization ;; ========================================================================== ;; -------------------------------------------------------------------------- ;; Parser — single-pass recursive descent ;; -------------------------------------------------------------------------- ;; Returns a list of top-level AST expressions. (define sx-parse (fn (source) (let ((pos 0) (len-src (len source))) ;; -- Cursor helpers (closure over pos, source, len-src) -- (define skip-comment (fn () (when (and (< pos len-src) (not (= (nth source pos) "\n"))) (set! pos (inc pos)) (skip-comment)))) (define skip-ws (fn () (when (< pos len-src) (let ((ch (nth source pos))) (cond ;; Whitespace (or (= ch " ") (= ch "\t") (= ch "\n") (= ch "\r")) (do (set! pos (inc pos)) (skip-ws)) ;; Comment — skip to end of line (= ch ";") (do (set! pos (inc pos)) (skip-comment) (skip-ws)) ;; Not whitespace or comment — stop :else nil))))) ;; -- Atom readers -- (define read-string (fn () (set! pos (inc pos)) ;; skip opening " (let ((buf "")) (define read-str-loop (fn () (if (>= pos len-src) (error "Unterminated string") (let ((ch (nth source pos))) (cond (= ch "\"") (do (set! pos (inc pos)) nil) ;; done (= ch "\\") (do (set! pos (inc pos)) (let ((esc (nth source pos))) (set! buf (str buf (cond (= esc "n") "\n" (= esc "t") "\t" (= esc "r") "\r" :else esc))) (set! pos (inc pos)) (read-str-loop))) :else (do (set! buf (str buf ch)) (set! pos (inc pos)) (read-str-loop))))))) (read-str-loop) buf))) (define read-ident (fn () (let ((start pos)) (define read-ident-loop (fn () (when (and (< pos len-src) (ident-char? (nth source pos))) (set! pos (inc pos)) (read-ident-loop)))) (read-ident-loop) (slice source start pos)))) (define read-keyword (fn () (set! pos (inc pos)) ;; skip : (make-keyword (read-ident)))) (define read-number (fn () (let ((start pos)) ;; Optional leading minus (when (and (< pos len-src) (= (nth source pos) "-")) (set! pos (inc pos))) ;; Integer digits (define read-digits (fn () (when (and (< pos len-src) (let ((c (nth source pos))) (and (>= c "0") (<= c "9")))) (set! pos (inc pos)) (read-digits)))) (read-digits) ;; Decimal part (when (and (< pos len-src) (= (nth source pos) ".")) (set! pos (inc pos)) (read-digits)) ;; Exponent (when (and (< pos len-src) (or (= (nth source pos) "e") (= (nth source pos) "E"))) (set! pos (inc pos)) (when (and (< pos len-src) (or (= (nth source pos) "+") (= (nth source pos) "-"))) (set! pos (inc pos))) (read-digits)) (parse-number (slice source start pos))))) (define read-symbol (fn () (let ((name (read-ident))) (cond (= name "true") true (= name "false") false (= name "nil") nil :else (make-symbol name))))) ;; -- Composite readers -- (define read-list (fn (close-ch) (let ((items (list))) (define read-list-loop (fn () (skip-ws) (if (>= pos len-src) (error "Unterminated list") (if (= (nth source pos) close-ch) (do (set! pos (inc pos)) nil) ;; done (do (append! items (read-expr)) (read-list-loop)))))) (read-list-loop) items))) (define read-map (fn () (let ((result (dict))) (define read-map-loop (fn () (skip-ws) (if (>= pos len-src) (error "Unterminated map") (if (= (nth source pos) "}") (do (set! pos (inc pos)) nil) ;; done (let ((key-expr (read-expr)) (key-str (if (= (type-of key-expr) "keyword") (keyword-name key-expr) (str key-expr))) (val-expr (read-expr))) (dict-set! result key-str val-expr) (read-map-loop)))))) (read-map-loop) result))) ;; -- Main expression reader -- (define read-expr (fn () (skip-ws) (if (>= pos len-src) (error "Unexpected end of input") (let ((ch (nth source pos))) (cond ;; Lists (= ch "(") (do (set! pos (inc pos)) (read-list ")")) (= ch "[") (do (set! pos (inc pos)) (read-list "]")) ;; Map (= ch "{") (do (set! pos (inc pos)) (read-map)) ;; String (= ch "\"") (read-string) ;; Keyword (= ch ":") (read-keyword) ;; Quasiquote sugar (= ch "`") (do (set! pos (inc pos)) (list (make-symbol "quasiquote") (read-expr))) ;; Unquote / splice-unquote (= ch ",") (do (set! pos (inc pos)) (if (and (< pos len-src) (= (nth source pos) "@")) (do (set! pos (inc pos)) (list (make-symbol "splice-unquote") (read-expr))) (list (make-symbol "unquote") (read-expr)))) ;; Number (or negative number) (or (and (>= ch "0") (<= ch "9")) (and (= ch "-") (< (inc pos) len-src) (let ((next-ch (nth source (inc pos)))) (and (>= next-ch "0") (<= next-ch "9"))))) (read-number) ;; Symbol (must be ident-start char) (ident-start? ch) (read-symbol) ;; Unexpected :else (error (str "Unexpected character: " ch))))))) ;; -- Entry point: parse all top-level expressions -- (let ((exprs (list))) (define parse-loop (fn () (skip-ws) (when (< pos len-src) (append! exprs (read-expr)) (parse-loop)))) (parse-loop) exprs)))) ;; -------------------------------------------------------------------------- ;; Serializer — AST → SX source text ;; -------------------------------------------------------------------------- (define sx-serialize (fn (val) (case (type-of val) "nil" "nil" "boolean" (if val "true" "false") "number" (str val) "string" (str "\"" (escape-string val) "\"") "symbol" (symbol-name val) "keyword" (str ":" (keyword-name val)) "list" (str "(" (join " " (map sx-serialize val)) ")") "dict" (sx-serialize-dict val) "sx-expr" (sx-expr-source val) :else (str val)))) (define sx-serialize-dict (fn (d) (str "{" (join " " (reduce (fn (acc key) (concat acc (list (str ":" key) (sx-serialize (dict-get d key))))) (list) (keys d))) "}"))) ;; -------------------------------------------------------------------------- ;; Platform parser interface ;; -------------------------------------------------------------------------- ;; ;; Character classification (implemented natively per target): ;; (ident-start? ch) → boolean ;; True for: a-z A-Z _ ~ * + - > < = / ! ? & ;; ;; (ident-char? ch) → boolean ;; True for: ident-start chars plus: 0-9 . : / [ ] # , ;; ;; Constructors (provided by the SX runtime): ;; (make-symbol name) → Symbol value ;; (make-keyword name) → Keyword value ;; (parse-number s) → number (int or float from string) ;; ;; String utilities: ;; (escape-string s) → string with " and \ escaped ;; (sx-expr-source e) → unwrap SxExpr to its source string ;; --------------------------------------------------------------------------