- Both bootstrappers (JS + Python) now gate shift/reset behind --extensions continuations flag. Without it, using reset/shift errors at runtime. - JS bootstrapper: extracted Continuation/ShiftSignal types, sfReset/sfShift, continuation? primitive, and typeOf handling into CONTINUATIONS_JS constant. Extension wraps evalList, aserSpecial, and typeOf post-transpilation. - Python bootstrapper: added special-forms.sx validation cross-check against eval.sx dispatch, warns on mismatches. - Added shared/sx/ref/special-forms.sx: 36 declarative form specs with syntax, docs, tail-position, and examples. Used by bootstrappers for validation. - Added ellipsis (...) support to both parser.py and parser.sx spec. - Updated continuations essay to reflect optional extension architecture. - Updated specs page and nav with special-forms.sx entry. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
328 lines
11 KiB
Plaintext
328 lines
11 KiB
Plaintext
;; ==========================================================================
|
|
;; parser.sx — Reference SX parser specification
|
|
;;
|
|
;; Defines how SX source text is tokenized and parsed into AST.
|
|
;; The parser is intentionally simple — s-expressions need minimal parsing.
|
|
;;
|
|
;; Single-pass recursive descent: reads source text directly into AST,
|
|
;; no separate tokenization phase. All mutable cursor state lives inside
|
|
;; the parse closure.
|
|
;;
|
|
;; Grammar:
|
|
;; program → expr*
|
|
;; expr → atom | list | vector | map | quote-sugar
|
|
;; list → '(' expr* ')'
|
|
;; vector → '[' expr* ']' (sugar for list)
|
|
;; map → '{' (key expr)* '}'
|
|
;; atom → string | number | keyword | symbol | boolean | nil
|
|
;; string → '"' (char | escape)* '"'
|
|
;; number → '-'? digit+ ('.' digit+)? ([eE] [+-]? digit+)?
|
|
;; keyword → ':' ident
|
|
;; symbol → ident
|
|
;; boolean → 'true' | 'false'
|
|
;; nil → 'nil'
|
|
;; ident → ident-start ident-char*
|
|
;; comment → ';' to end of line (discarded)
|
|
;;
|
|
;; Quote sugar:
|
|
;; `expr → (quasiquote expr)
|
|
;; ,expr → (unquote expr)
|
|
;; ,@expr → (splice-unquote expr)
|
|
;;
|
|
;; Platform interface (each target implements natively):
|
|
;; (ident-start? ch) → boolean
|
|
;; (ident-char? ch) → boolean
|
|
;; (make-symbol name) → Symbol value
|
|
;; (make-keyword name) → Keyword value
|
|
;; (escape-string s) → string with " and \ escaped for serialization
|
|
;; ==========================================================================
|
|
|
|
|
|
;; --------------------------------------------------------------------------
|
|
;; Parser — single-pass recursive descent
|
|
;; --------------------------------------------------------------------------
|
|
;; Returns a list of top-level AST expressions.
|
|
|
|
(define sx-parse
|
|
(fn (source)
|
|
(let ((pos 0)
|
|
(len-src (len source)))
|
|
|
|
;; -- Cursor helpers (closure over pos, source, len-src) --
|
|
|
|
(define skip-comment
|
|
(fn ()
|
|
(when (and (< pos len-src) (not (= (nth source pos) "\n")))
|
|
(set! pos (inc pos))
|
|
(skip-comment))))
|
|
|
|
(define skip-ws
|
|
(fn ()
|
|
(when (< pos len-src)
|
|
(let ((ch (nth source pos)))
|
|
(cond
|
|
;; Whitespace
|
|
(or (= ch " ") (= ch "\t") (= ch "\n") (= ch "\r"))
|
|
(do (set! pos (inc pos)) (skip-ws))
|
|
;; Comment — skip to end of line
|
|
(= ch ";")
|
|
(do (set! pos (inc pos))
|
|
(skip-comment)
|
|
(skip-ws))
|
|
;; Not whitespace or comment — stop
|
|
:else nil)))))
|
|
|
|
;; -- Atom readers --
|
|
|
|
(define read-string
|
|
(fn ()
|
|
(set! pos (inc pos)) ;; skip opening "
|
|
(let ((buf ""))
|
|
(define read-str-loop
|
|
(fn ()
|
|
(if (>= pos len-src)
|
|
(error "Unterminated string")
|
|
(let ((ch (nth source pos)))
|
|
(cond
|
|
(= ch "\"")
|
|
(do (set! pos (inc pos)) nil) ;; done
|
|
(= ch "\\")
|
|
(do (set! pos (inc pos))
|
|
(let ((esc (nth source pos)))
|
|
(set! buf (str buf
|
|
(cond
|
|
(= esc "n") "\n"
|
|
(= esc "t") "\t"
|
|
(= esc "r") "\r"
|
|
:else esc)))
|
|
(set! pos (inc pos))
|
|
(read-str-loop)))
|
|
:else
|
|
(do (set! buf (str buf ch))
|
|
(set! pos (inc pos))
|
|
(read-str-loop)))))))
|
|
(read-str-loop)
|
|
buf)))
|
|
|
|
(define read-ident
|
|
(fn ()
|
|
(let ((start pos))
|
|
(define read-ident-loop
|
|
(fn ()
|
|
(when (and (< pos len-src)
|
|
(ident-char? (nth source pos)))
|
|
(set! pos (inc pos))
|
|
(read-ident-loop))))
|
|
(read-ident-loop)
|
|
(slice source start pos))))
|
|
|
|
(define read-keyword
|
|
(fn ()
|
|
(set! pos (inc pos)) ;; skip :
|
|
(make-keyword (read-ident))))
|
|
|
|
(define read-number
|
|
(fn ()
|
|
(let ((start pos))
|
|
;; Optional leading minus
|
|
(when (and (< pos len-src) (= (nth source pos) "-"))
|
|
(set! pos (inc pos)))
|
|
;; Integer digits
|
|
(define read-digits
|
|
(fn ()
|
|
(when (and (< pos len-src)
|
|
(let ((c (nth source pos)))
|
|
(and (>= c "0") (<= c "9"))))
|
|
(set! pos (inc pos))
|
|
(read-digits))))
|
|
(read-digits)
|
|
;; Decimal part
|
|
(when (and (< pos len-src) (= (nth source pos) "."))
|
|
(set! pos (inc pos))
|
|
(read-digits))
|
|
;; Exponent
|
|
(when (and (< pos len-src)
|
|
(or (= (nth source pos) "e")
|
|
(= (nth source pos) "E")))
|
|
(set! pos (inc pos))
|
|
(when (and (< pos len-src)
|
|
(or (= (nth source pos) "+")
|
|
(= (nth source pos) "-")))
|
|
(set! pos (inc pos)))
|
|
(read-digits))
|
|
(parse-number (slice source start pos)))))
|
|
|
|
(define read-symbol
|
|
(fn ()
|
|
(let ((name (read-ident)))
|
|
(cond
|
|
(= name "true") true
|
|
(= name "false") false
|
|
(= name "nil") nil
|
|
:else (make-symbol name)))))
|
|
|
|
;; -- Composite readers --
|
|
|
|
(define read-list
|
|
(fn (close-ch)
|
|
(let ((items (list)))
|
|
(define read-list-loop
|
|
(fn ()
|
|
(skip-ws)
|
|
(if (>= pos len-src)
|
|
(error "Unterminated list")
|
|
(if (= (nth source pos) close-ch)
|
|
(do (set! pos (inc pos)) nil) ;; done
|
|
(do (append! items (read-expr))
|
|
(read-list-loop))))))
|
|
(read-list-loop)
|
|
items)))
|
|
|
|
(define read-map
|
|
(fn ()
|
|
(let ((result (dict)))
|
|
(define read-map-loop
|
|
(fn ()
|
|
(skip-ws)
|
|
(if (>= pos len-src)
|
|
(error "Unterminated map")
|
|
(if (= (nth source pos) "}")
|
|
(do (set! pos (inc pos)) nil) ;; done
|
|
(let ((key-expr (read-expr))
|
|
(key-str (if (= (type-of key-expr) "keyword")
|
|
(keyword-name key-expr)
|
|
(str key-expr)))
|
|
(val-expr (read-expr)))
|
|
(dict-set! result key-str val-expr)
|
|
(read-map-loop))))))
|
|
(read-map-loop)
|
|
result)))
|
|
|
|
;; -- Main expression reader --
|
|
|
|
(define read-expr
|
|
(fn ()
|
|
(skip-ws)
|
|
(if (>= pos len-src)
|
|
(error "Unexpected end of input")
|
|
(let ((ch (nth source pos)))
|
|
(cond
|
|
;; Lists
|
|
(= ch "(")
|
|
(do (set! pos (inc pos)) (read-list ")"))
|
|
(= ch "[")
|
|
(do (set! pos (inc pos)) (read-list "]"))
|
|
|
|
;; Map
|
|
(= ch "{")
|
|
(do (set! pos (inc pos)) (read-map))
|
|
|
|
;; String
|
|
(= ch "\"")
|
|
(read-string)
|
|
|
|
;; Keyword
|
|
(= ch ":")
|
|
(read-keyword)
|
|
|
|
;; Quasiquote sugar
|
|
(= ch "`")
|
|
(do (set! pos (inc pos))
|
|
(list (make-symbol "quasiquote") (read-expr)))
|
|
|
|
;; Unquote / splice-unquote
|
|
(= ch ",")
|
|
(do (set! pos (inc pos))
|
|
(if (and (< pos len-src) (= (nth source pos) "@"))
|
|
(do (set! pos (inc pos))
|
|
(list (make-symbol "splice-unquote") (read-expr)))
|
|
(list (make-symbol "unquote") (read-expr))))
|
|
|
|
;; Number (or negative number)
|
|
(or (and (>= ch "0") (<= ch "9"))
|
|
(and (= ch "-")
|
|
(< (inc pos) len-src)
|
|
(let ((next-ch (nth source (inc pos))))
|
|
(and (>= next-ch "0") (<= next-ch "9")))))
|
|
(read-number)
|
|
|
|
;; Ellipsis (... as a symbol)
|
|
(and (= ch ".")
|
|
(< (+ pos 2) len-src)
|
|
(= (nth source (+ pos 1)) ".")
|
|
(= (nth source (+ pos 2)) "."))
|
|
(do (set! pos (+ pos 3))
|
|
(make-symbol "..."))
|
|
|
|
;; Symbol (must be ident-start char)
|
|
(ident-start? ch)
|
|
(read-symbol)
|
|
|
|
;; Unexpected
|
|
:else
|
|
(error (str "Unexpected character: " ch)))))))
|
|
|
|
;; -- Entry point: parse all top-level expressions --
|
|
(let ((exprs (list)))
|
|
(define parse-loop
|
|
(fn ()
|
|
(skip-ws)
|
|
(when (< pos len-src)
|
|
(append! exprs (read-expr))
|
|
(parse-loop))))
|
|
(parse-loop)
|
|
exprs))))
|
|
|
|
|
|
;; --------------------------------------------------------------------------
|
|
;; Serializer — AST → SX source text
|
|
;; --------------------------------------------------------------------------
|
|
|
|
(define sx-serialize
|
|
(fn (val)
|
|
(case (type-of val)
|
|
"nil" "nil"
|
|
"boolean" (if val "true" "false")
|
|
"number" (str val)
|
|
"string" (str "\"" (escape-string val) "\"")
|
|
"symbol" (symbol-name val)
|
|
"keyword" (str ":" (keyword-name val))
|
|
"list" (str "(" (join " " (map sx-serialize val)) ")")
|
|
"dict" (sx-serialize-dict val)
|
|
"sx-expr" (sx-expr-source val)
|
|
:else (str val))))
|
|
|
|
|
|
(define sx-serialize-dict
|
|
(fn (d)
|
|
(str "{"
|
|
(join " "
|
|
(reduce
|
|
(fn (acc key)
|
|
(concat acc (list (str ":" key) (sx-serialize (dict-get d key)))))
|
|
(list)
|
|
(keys d)))
|
|
"}")))
|
|
|
|
|
|
;; --------------------------------------------------------------------------
|
|
;; Platform parser interface
|
|
;; --------------------------------------------------------------------------
|
|
;;
|
|
;; Character classification (implemented natively per target):
|
|
;; (ident-start? ch) → boolean
|
|
;; True for: a-z A-Z _ ~ * + - > < = / ! ? &
|
|
;;
|
|
;; (ident-char? ch) → boolean
|
|
;; True for: ident-start chars plus: 0-9 . : / [ ] # ,
|
|
;;
|
|
;; Constructors (provided by the SX runtime):
|
|
;; (make-symbol name) → Symbol value
|
|
;; (make-keyword name) → Keyword value
|
|
;; (parse-number s) → number (int or float from string)
|
|
;;
|
|
;; String utilities:
|
|
;; (escape-string s) → string with " and \ escaped
|
|
;; (sx-expr-source e) → unwrap SxExpr to its source string
|
|
;; --------------------------------------------------------------------------
|