Files
rose-ash/spec/parser.sx
giles 4b600f17e8 spec: character type (char? char->integer #\a literals + predicates)
- Add SxChar tagged object {_char, codepoint} to JS platform
- char? char->integer integer->char char-upcase char-downcase
- char=? char<? char>? char<=? char>=? comparators
- char-ci=? char-ci<? char-ci>? char-ci<=? char-ci>=? case-insensitive
- char-alphabetic? char-numeric? char-whitespace? char-upper-case? char-lower-case?
- string->list (returns chars) and list->string (accepts chars)
- #\a #\space #\newline reader syntax in spec/parser.sx
- integer->char alias in spec/evaluator.sx
- js-char-renames dict in transpiler.sx for ->-containing names
- 43 tests in spec/tests/test-chars.sx, all passing

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-01 11:50:04 +00:00

586 lines
19 KiB
Plaintext

;; ==========================================================================
;; parser.sx — Reference SX parser specification
;;
;; Defines how SX source text is tokenized and parsed into AST.
;; The parser is intentionally simple — s-expressions need minimal parsing.
;;
;; Single-pass recursive descent: reads source text directly into AST,
;; no separate tokenization phase. All mutable cursor state lives inside
;; the parse closure.
;;
;; Grammar:
;; program → expr*
;; expr → atom | list | vector | map | quote-sugar
;; list → '(' expr* ')'
;; vector → '[' expr* ']' (sugar for list)
;; map → '{' (key expr)* '}'
;; atom → string | number | keyword | symbol | boolean | nil | char
;; string → '"' (char | escape)* '"'
;; number → '-'? digit+ ('.' digit+)? ([eE] [+-]? digit+)?
;; keyword → ':' ident
;; symbol → ident
;; boolean → 'true' | 'false'
;; nil → 'nil'
;; char → '#\' (ident | single-char)
;; ident → ident-start ident-char*
;; comment → ';' to end of line (discarded)
;;
;; Quote sugar:
;; 'expr → (quote expr)
;; `expr → (quasiquote expr)
;; ,expr → (unquote expr)
;; ,@expr → (splice-unquote expr)
;;
;; Reader macros:
;; #;expr → datum comment (read and discard expr)
;; #|raw chars| → raw string literal (no escape processing)
;; #'expr → (quote expr)
;; #\a → character literal (char value)
;; #\space → named character (space = 32)
;; #name expr → extensible dispatch (calls registered handler)
;;
;; Platform interface (each target implements natively):
;; (ident-start? ch) → boolean
;; (ident-char? ch) → boolean
;; (make-symbol name) → Symbol value
;; (make-keyword name) → Keyword value
;; (escape-string s) → string with " and \ escaped for serialization
;; (make-char n) → Char value from Unicode codepoint
;; (char->integer c) → Unicode codepoint of char c
;; (char-from-code n) → single-char string from codepoint
;; (char-code s) → codepoint of first char in string s
;; ==========================================================================
;; --------------------------------------------------------------------------
;; Parser — single-pass recursive descent
;; --------------------------------------------------------------------------
;; Returns a list of top-level AST expressions.
;; Parse SX source string into AST
(define
sx-parse
:effects ()
(fn
((source :as string))
(let
((pos 0) (len-src (len source)))
(define
skip-comment
:effects ()
(fn
()
(when
(and (< pos len-src) (not (= (nth source pos) "\n")))
(set! pos (inc pos))
(skip-comment))))
(define
skip-ws
:effects ()
(fn
()
(when
(< pos len-src)
(let
((ch (nth source pos)))
(cond
(or (= ch " ") (= ch "\t") (= ch "\n") (= ch "\r"))
(do (set! pos (inc pos)) (skip-ws))
(= ch ";")
(do (set! pos (inc pos)) (skip-comment) (skip-ws))
:else nil)))))
(define
hex-digit-value
:effects ()
(fn (ch) (index-of "0123456789abcdef" (lower ch))))
(define
read-string
:effects ()
(fn
()
(set! pos (inc pos))
(let
((buf ""))
(define
read-str-loop
:effects ()
(fn
()
(if
(>= pos len-src)
(error "Unterminated string")
(let
((ch (nth source pos)))
(cond
(= ch "\"")
(do (set! pos (inc pos)) nil)
(= ch "\\")
(do
(set! pos (inc pos))
(let
((esc (nth source pos)))
(if
(= esc "u")
(do
(set! pos (inc pos))
(let
((d0 (hex-digit-value (nth source pos)))
(_ (set! pos (inc pos)))
(d1 (hex-digit-value (nth source pos)))
(_ (set! pos (inc pos)))
(d2 (hex-digit-value (nth source pos)))
(_ (set! pos (inc pos)))
(d3 (hex-digit-value (nth source pos)))
(_ (set! pos (inc pos))))
(set!
buf
(str
buf
(char-from-code
(+
(* d0 4096)
(* d1 256)
(* d2 16)
d3))))
(read-str-loop)))
(do
(set!
buf
(str
buf
(cond
(= esc "n")
"\n"
(= esc "t")
"\t"
(= esc "r")
"\r"
:else esc)))
(set! pos (inc pos))
(read-str-loop)))))
:else (do
(set! buf (str buf ch))
(set! pos (inc pos))
(read-str-loop)))))))
(read-str-loop)
buf)))
(define
read-ident
:effects ()
(fn
()
(let
((start pos))
(define
read-ident-loop
:effects ()
(fn
()
(when
(and (< pos len-src) (ident-char? (nth source pos)))
(set! pos (inc pos))
(read-ident-loop))))
(read-ident-loop)
(slice source start pos))))
(define
read-keyword
:effects ()
(fn () (set! pos (inc pos)) (make-keyword (read-ident))))
(define
read-number
:effects ()
(fn
()
(let
((start pos))
(when
(and (< pos len-src) (= (nth source pos) "-"))
(set! pos (inc pos)))
(define
read-digits
:effects ()
(fn
()
(when
(and
(< pos len-src)
(let
((c (nth source pos)))
(and (>= c "0") (<= c "9"))))
(set! pos (inc pos))
(read-digits))))
(read-digits)
(when
(and (< pos len-src) (= (nth source pos) "."))
(set! pos (inc pos))
(read-digits))
(when
(and
(< pos len-src)
(or (= (nth source pos) "e") (= (nth source pos) "E")))
(set! pos (inc pos))
(when
(and
(< pos len-src)
(or (= (nth source pos) "+") (= (nth source pos) "-")))
(set! pos (inc pos)))
(read-digits))
(parse-number (slice source start pos)))))
(define
read-symbol
:effects ()
(fn
()
(let
((name (read-ident)))
(cond
(= name "true")
true
(= name "false")
false
(= name "nil")
nil
:else (make-symbol name)))))
(define
read-list
:effects ()
(fn
((close-ch :as string))
(let
((items (list)))
(define
read-list-loop
:effects ()
(fn
()
(skip-ws)
(if
(>= pos len-src)
(error "Unterminated list")
(if
(= (nth source pos) close-ch)
(do (set! pos (inc pos)) nil)
(do (append! items (read-expr)) (read-list-loop))))))
(read-list-loop)
items)))
(define
read-map
:effects ()
(fn
()
(let
((result (dict)))
(define
read-map-loop
:effects ()
(fn
()
(skip-ws)
(if
(>= pos len-src)
(error "Unterminated map")
(if
(= (nth source pos) "}")
(do (set! pos (inc pos)) nil)
(let
((key-expr (read-expr))
(key-str
(if
(= (type-of key-expr) "keyword")
(keyword-name key-expr)
(str key-expr)))
(val-expr (read-expr)))
(dict-set! result key-str val-expr)
(read-map-loop))))))
(read-map-loop)
result)))
(define
read-raw-string
:effects ()
(fn
()
(let
((buf ""))
(define
raw-loop
:effects ()
(fn
()
(if
(>= pos len-src)
(error "Unterminated raw string")
(let
((ch (nth source pos)))
(if
(= ch "|")
(do (set! pos (inc pos)) nil)
(do
(set! buf (str buf ch))
(set! pos (inc pos))
(raw-loop)))))))
(raw-loop)
buf)))
(define
read-char-literal
:effects ()
(fn
()
(if
(>= pos len-src)
(error "Unexpected end of input after #\\")
(let
((first-ch (nth source pos)))
(if
(ident-start? first-ch)
(let
((char-start pos))
(define
read-char-name-loop
:effects ()
(fn
()
(when
(and (< pos len-src) (ident-char? (nth source pos)))
(set! pos (inc pos))
(read-char-name-loop))))
(read-char-name-loop)
(let
((char-name (slice source char-start pos)))
(make-char
(cond
(= char-name "space")
32
(= char-name "newline")
10
(= char-name "tab")
9
(= char-name "nul")
0
(= char-name "null")
0
(= char-name "return")
13
(= char-name "escape")
27
(= char-name "delete")
127
(= char-name "backspace")
8
(= char-name "altmode")
27
(= char-name "rubout")
127
:else (char-code first-ch)))))
(do (set! pos (inc pos)) (make-char (char-code first-ch))))))))
(define
read-expr
:effects ()
(fn
()
(skip-ws)
(if
(>= pos len-src)
(error "Unexpected end of input")
(let
((ch (nth source pos)))
(cond
(= ch "(")
(do (set! pos (inc pos)) (read-list ")"))
(= ch "[")
(do (set! pos (inc pos)) (read-list "]"))
(= ch "{")
(do (set! pos (inc pos)) (read-map))
(= ch "\"")
(read-string)
(= ch ":")
(read-keyword)
(= ch "'")
(do
(set! pos (inc pos))
(list (make-symbol "quote") (read-expr)))
(= ch "`")
(do
(set! pos (inc pos))
(list (make-symbol "quasiquote") (read-expr)))
(= ch ",")
(do
(set! pos (inc pos))
(if
(and (< pos len-src) (= (nth source pos) "@"))
(do
(set! pos (inc pos))
(list (make-symbol "splice-unquote") (read-expr)))
(list (make-symbol "unquote") (read-expr))))
(= ch "#")
(do
(set! pos (inc pos))
(if
(>= pos len-src)
(error "Unexpected end of input after #")
(let
((dispatch-ch (nth source pos)))
(cond
(= dispatch-ch ";")
(do (set! pos (inc pos)) (read-expr) (read-expr))
(= dispatch-ch "|")
(do (set! pos (inc pos)) (read-raw-string))
(= dispatch-ch "'")
(do
(set! pos (inc pos))
(list (make-symbol "quote") (read-expr)))
(= dispatch-ch "\\")
(do (set! pos (inc pos)) (read-char-literal))
(ident-start? dispatch-ch)
(let
((macro-name (read-ident)))
(let
((handler (reader-macro-get macro-name)))
(if
handler
(handler (read-expr))
(error
(str "Unknown reader macro: #" macro-name)))))
:else (error (str "Unknown reader macro: #" dispatch-ch))))))
(or
(and (>= ch "0") (<= ch "9"))
(and
(= ch "-")
(< (inc pos) len-src)
(let
((next-ch (nth source (inc pos))))
(and (>= next-ch "0") (<= next-ch "9")))))
(read-number)
(and
(= ch ".")
(< (+ pos 2) len-src)
(= (nth source (+ pos 1)) ".")
(= (nth source (+ pos 2)) "."))
(do (set! pos (+ pos 3)) (make-symbol "..."))
(ident-start? ch)
(read-symbol)
:else (error (str "Unexpected character: " ch)))))))
(let
((exprs (list)))
(define
parse-loop
:effects ()
(fn
()
(skip-ws)
(when (< pos len-src) (append! exprs (read-expr)) (parse-loop))))
(parse-loop)
exprs))))
;; --------------------------------------------------------------------------
;; Serializer — AST → SX source text
;; --------------------------------------------------------------------------
;; Serialize AST value back to SX source
(define
sx-serialize
:effects ()
(fn
(val)
(case
(type-of val)
"nil"
"nil"
"boolean"
(if val "true" "false")
"number"
(str val)
"string"
(str "\"" (escape-string val) "\"")
"symbol"
(symbol-name val)
"keyword"
(str ":" (keyword-name val))
"list"
(str "(" (join " " (map sx-serialize val)) ")")
"dict"
(sx-serialize-dict val)
"sx-expr"
(sx-expr-source val)
"spread"
(str "(make-spread " (sx-serialize-dict (spread-attrs val)) ")")
"char"
(let
((n (char->integer val)))
(str
"#\\"
(cond
(= n 32)
"space"
(= n 10)
"newline"
(= n 9)
"tab"
(= n 13)
"return"
(= n 0)
"nul"
(= n 27)
"escape"
(= n 127)
"delete"
(= n 8)
"backspace"
:else (char-from-code n))))
:else (str val))))
;; Serialize a dict to SX {:key val} format
(define
sx-serialize-dict
:effects ()
(fn
((d :as dict))
(str
"{"
(join
" "
(reduce
(fn
((acc :as list) (key :as string))
(concat
acc
(list (str ":" key) (sx-serialize (dict-get d key)))))
(list)
(keys d)))
"}")))
;; Alias: adapters use (serialize val) — canonicalize to sx-serialize
(define serialize sx-serialize)
;; --------------------------------------------------------------------------
;; Platform parser interface
;; --------------------------------------------------------------------------
;;
;; Character classification (implemented natively per target):
;; (ident-start? ch) → boolean
;; True for: a-z A-Z _ ~ * + - > < = / ! ? &
;;
;; (ident-char? ch) → boolean
;; True for: ident-start chars plus: 0-9 . : / # ,
;;
;; Constructors (provided by the SX runtime):
;; (make-symbol name) → Symbol value
;; (make-keyword name) → Keyword value
;; (parse-number s) → number (int or float from string)
;; (make-char n) → Char value from Unicode codepoint n
;; (char->integer c) → Unicode codepoint of char c
;;
;; String utilities:
;; (escape-string s) → string with " and \ escaped
;; (sx-expr-source e) → unwrap SxExpr to its source string
;; (char-from-code n) → single-char string from codepoint n
;; (char-code s) → codepoint of first char in string s
;;
;; Reader macro registry:
;; (reader-macro-get name) → handler fn or nil
;; (reader-macro-set! name handler) → register a reader macro
;; --------------------------------------------------------------------------