Files
rose-ash/shared/sx/ref/parser.sx
giles 04366990ec
All checks were successful
Build and Deploy / build-and-deploy (push) Successful in 2m33s
Enforce SX boundary contract via boundary.sx spec + runtime validation
Add boundary.sx declaring all 34 I/O primitives, 32 page helpers, and 9
allowed boundary types. Runtime validation in boundary.py checks every
registration against the spec — undeclared primitives/helpers crash at
startup with SX_BOUNDARY_STRICT=1 (now set in both dev and prod).

Key changes:
- Move 5 I/O-in-disguise primitives (app-url, asset-url, config,
  jinja-global, relations-from) from primitives.py to primitives_io.py
- Remove duplicate url-for/route-prefix from primitives.py (already in IO)
- Fix parse-datetime to return ISO string instead of raw datetime
- Add datetime→isoformat conversion in _convert_result at the edge
- Wrap page helper return values with boundary type validation
- Replace all SxExpr(f"...") patterns with sx_call() or _sx_fragment()
- Add assert declaration to primitives.sx

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-05 23:50:02 +00:00

320 lines
11 KiB
Plaintext

;; ==========================================================================
;; parser.sx — Reference SX parser specification
;;
;; Defines how SX source text is tokenized and parsed into AST.
;; The parser is intentionally simple — s-expressions need minimal parsing.
;;
;; Single-pass recursive descent: reads source text directly into AST,
;; no separate tokenization phase. All mutable cursor state lives inside
;; the parse closure.
;;
;; Grammar:
;; program → expr*
;; expr → atom | list | vector | map | quote-sugar
;; list → '(' expr* ')'
;; vector → '[' expr* ']' (sugar for list)
;; map → '{' (key expr)* '}'
;; atom → string | number | keyword | symbol | boolean | nil
;; string → '"' (char | escape)* '"'
;; number → '-'? digit+ ('.' digit+)? ([eE] [+-]? digit+)?
;; keyword → ':' ident
;; symbol → ident
;; boolean → 'true' | 'false'
;; nil → 'nil'
;; ident → ident-start ident-char*
;; comment → ';' to end of line (discarded)
;;
;; Quote sugar:
;; `expr → (quasiquote expr)
;; ,expr → (unquote expr)
;; ,@expr → (splice-unquote expr)
;;
;; Platform interface (each target implements natively):
;; (ident-start? ch) → boolean
;; (ident-char? ch) → boolean
;; (make-symbol name) → Symbol value
;; (make-keyword name) → Keyword value
;; (escape-string s) → string with " and \ escaped for serialization
;; ==========================================================================
;; --------------------------------------------------------------------------
;; Parser — single-pass recursive descent
;; --------------------------------------------------------------------------
;; Returns a list of top-level AST expressions.
(define sx-parse
(fn (source)
(let ((pos 0)
(len-src (len source)))
;; -- Cursor helpers (closure over pos, source, len-src) --
(define skip-comment
(fn ()
(when (and (< pos len-src) (not (= (nth source pos) "\n")))
(set! pos (inc pos))
(skip-comment))))
(define skip-ws
(fn ()
(when (< pos len-src)
(let ((ch (nth source pos)))
(cond
;; Whitespace
(or (= ch " ") (= ch "\t") (= ch "\n") (= ch "\r"))
(do (set! pos (inc pos)) (skip-ws))
;; Comment — skip to end of line
(= ch ";")
(do (set! pos (inc pos))
(skip-comment)
(skip-ws))
;; Not whitespace or comment — stop
:else nil)))))
;; -- Atom readers --
(define read-string
(fn ()
(set! pos (inc pos)) ;; skip opening "
(let ((buf ""))
(define read-str-loop
(fn ()
(if (>= pos len-src)
(error "Unterminated string")
(let ((ch (nth source pos)))
(cond
(= ch "\"")
(do (set! pos (inc pos)) nil) ;; done
(= ch "\\")
(do (set! pos (inc pos))
(let ((esc (nth source pos)))
(set! buf (str buf
(cond
(= esc "n") "\n"
(= esc "t") "\t"
(= esc "r") "\r"
:else esc)))
(set! pos (inc pos))
(read-str-loop)))
:else
(do (set! buf (str buf ch))
(set! pos (inc pos))
(read-str-loop)))))))
(read-str-loop)
buf)))
(define read-ident
(fn ()
(let ((start pos))
(define read-ident-loop
(fn ()
(when (and (< pos len-src)
(ident-char? (nth source pos)))
(set! pos (inc pos))
(read-ident-loop))))
(read-ident-loop)
(slice source start pos))))
(define read-keyword
(fn ()
(set! pos (inc pos)) ;; skip :
(make-keyword (read-ident))))
(define read-number
(fn ()
(let ((start pos))
;; Optional leading minus
(when (and (< pos len-src) (= (nth source pos) "-"))
(set! pos (inc pos)))
;; Integer digits
(define read-digits
(fn ()
(when (and (< pos len-src)
(let ((c (nth source pos)))
(and (>= c "0") (<= c "9"))))
(set! pos (inc pos))
(read-digits))))
(read-digits)
;; Decimal part
(when (and (< pos len-src) (= (nth source pos) "."))
(set! pos (inc pos))
(read-digits))
;; Exponent
(when (and (< pos len-src)
(or (= (nth source pos) "e")
(= (nth source pos) "E")))
(set! pos (inc pos))
(when (and (< pos len-src)
(or (= (nth source pos) "+")
(= (nth source pos) "-")))
(set! pos (inc pos)))
(read-digits))
(parse-number (slice source start pos)))))
(define read-symbol
(fn ()
(let ((name (read-ident)))
(cond
(= name "true") true
(= name "false") false
(= name "nil") nil
:else (make-symbol name)))))
;; -- Composite readers --
(define read-list
(fn (close-ch)
(let ((items (list)))
(define read-list-loop
(fn ()
(skip-ws)
(if (>= pos len-src)
(error "Unterminated list")
(if (= (nth source pos) close-ch)
(do (set! pos (inc pos)) nil) ;; done
(do (append! items (read-expr))
(read-list-loop))))))
(read-list-loop)
items)))
(define read-map
(fn ()
(let ((result (dict)))
(define read-map-loop
(fn ()
(skip-ws)
(if (>= pos len-src)
(error "Unterminated map")
(if (= (nth source pos) "}")
(do (set! pos (inc pos)) nil) ;; done
(let ((key-expr (read-expr))
(key-str (if (= (type-of key-expr) "keyword")
(keyword-name key-expr)
(str key-expr)))
(val-expr (read-expr)))
(dict-set! result key-str val-expr)
(read-map-loop))))))
(read-map-loop)
result)))
;; -- Main expression reader --
(define read-expr
(fn ()
(skip-ws)
(if (>= pos len-src)
(error "Unexpected end of input")
(let ((ch (nth source pos)))
(cond
;; Lists
(= ch "(")
(do (set! pos (inc pos)) (read-list ")"))
(= ch "[")
(do (set! pos (inc pos)) (read-list "]"))
;; Map
(= ch "{")
(do (set! pos (inc pos)) (read-map))
;; String
(= ch "\"")
(read-string)
;; Keyword
(= ch ":")
(read-keyword)
;; Quasiquote sugar
(= ch "`")
(do (set! pos (inc pos))
(list (make-symbol "quasiquote") (read-expr)))
;; Unquote / splice-unquote
(= ch ",")
(do (set! pos (inc pos))
(if (and (< pos len-src) (= (nth source pos) "@"))
(do (set! pos (inc pos))
(list (make-symbol "splice-unquote") (read-expr)))
(list (make-symbol "unquote") (read-expr))))
;; Number (or negative number)
(or (and (>= ch "0") (<= ch "9"))
(and (= ch "-")
(< (inc pos) len-src)
(let ((next-ch (nth source (inc pos))))
(and (>= next-ch "0") (<= next-ch "9")))))
(read-number)
;; Symbol (must be ident-start char)
(ident-start? ch)
(read-symbol)
;; Unexpected
:else
(error (str "Unexpected character: " ch)))))))
;; -- Entry point: parse all top-level expressions --
(let ((exprs (list)))
(define parse-loop
(fn ()
(skip-ws)
(when (< pos len-src)
(append! exprs (read-expr))
(parse-loop))))
(parse-loop)
exprs))))
;; --------------------------------------------------------------------------
;; Serializer — AST → SX source text
;; --------------------------------------------------------------------------
(define sx-serialize
(fn (val)
(case (type-of val)
"nil" "nil"
"boolean" (if val "true" "false")
"number" (str val)
"string" (str "\"" (escape-string val) "\"")
"symbol" (symbol-name val)
"keyword" (str ":" (keyword-name val))
"list" (str "(" (join " " (map sx-serialize val)) ")")
"dict" (sx-serialize-dict val)
"sx-expr" (sx-expr-source val)
:else (str val))))
(define sx-serialize-dict
(fn (d)
(str "{"
(join " "
(reduce
(fn (acc key)
(concat acc (list (str ":" key) (sx-serialize (dict-get d key)))))
(list)
(keys d)))
"}")))
;; --------------------------------------------------------------------------
;; Platform parser interface
;; --------------------------------------------------------------------------
;;
;; Character classification (implemented natively per target):
;; (ident-start? ch) → boolean
;; True for: a-z A-Z _ ~ * + - > < = / ! ? &
;;
;; (ident-char? ch) → boolean
;; True for: ident-start chars plus: 0-9 . : / [ ] # ,
;;
;; Constructors (provided by the SX runtime):
;; (make-symbol name) → Symbol value
;; (make-keyword name) → Keyword value
;; (parse-number s) → number (int or float from string)
;;
;; String utilities:
;; (escape-string s) → string with " and \ escaped
;; (sx-expr-source e) → unwrap SxExpr to its source string
;; --------------------------------------------------------------------------