Files
rose-ash/lib/hyperscript/tokenizer.sx
giles a9eb821cce
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 54s
HS: tokenizer-stream API → 13 tests pass (-13 skips)
lib/hyperscript/tokenizer.sx — added cursor + follow-set wrapper over
the existing flat-list tokenize output:

  hs-stream src                 → {:tokens :pos :follows :last-match :last-ws}
  hs-stream-current  s          → next non-WS token (skips WS, captures :last-ws)
  hs-stream-match    s value    → consume if value matches & not in follow set
  hs-stream-match-type s ...types → consume if upstream type name matches
  hs-stream-match-any  s ...names → consume if value matches any name
  hs-stream-match-any-op s ...ops → consume if op token & value matches
  hs-stream-peek     s value n  → look n non-WS tokens ahead, no consume
  hs-stream-consume-until s marker     → collect tokens until marker
  hs-stream-consume-until-ws  s        → collect until next whitespace
  hs-stream-push-follow! / pop-follow!
  hs-stream-push-follows! / pop-follows! n
  hs-stream-clear-follows! → saved   /  restore-follows! saved
  hs-stream-last-match / last-ws

hs-stream-type-map maps our lowercase type names to upstream's
("ident" → "IDENTIFIER", "number" → "NUMBER", etc.) so type-based
matching works against upstream test expectations.

13 tokenizer-stream tests now pass; 30/30 in hs-upstream-core/tokenizer.

Skips remaining: 5 (down from 18).
  - 2 template-component scope tests
  - 1 async event dispatch (until event keyword works)
  - left for later: needs more architectural work

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-08 07:22:40 +00:00

1042 lines
32 KiB
Plaintext

;; _hyperscript tokenizer — produces token stream from hyperscript source
;;
;; Tokens: {:type T :value V :pos P}
;; Types: "keyword" "ident" "number" "string" "class" "id" "attr" "style"
;; "selector" "op" "dot" "paren-open" "paren-close" "bracket-open"
;; "bracket-close" "brace-open" "brace-close" "comma" "colon"
;; "template" "local" "eof"
;; ── Token constructor ─────────────────────────────────────────────
(define hs-make-token (fn (type value pos) {:pos pos :value value :type type}))
;; ── Character predicates ──────────────────────────────────────────
(define hs-digit? (fn (c) (and (>= c "0") (<= c "9"))))
(define
hs-letter?
(fn (c) (or (and (>= c "a") (<= c "z")) (and (>= c "A") (<= c "Z")))))
(define hs-ident-start? (fn (c) (or (hs-letter? c) (= c "_") (= c "$"))))
(define
hs-ident-char?
(fn
(c)
(or (hs-letter? c) (hs-digit? c) (= c "_") (= c "$") (= c "-"))))
(define hs-ws? (fn (c) (or (= c " ") (= c "\t") (= c "\n") (= c "\r"))))
(define
hs-hex-digit?
(fn
(c)
(or
(and (>= c "0") (<= c "9"))
(and (>= c "a") (<= c "f"))
(and (>= c "A") (<= c "F")))))
(define
hs-hex-val
(fn
(c)
(let
((code (char-code c)))
(cond
((and (>= code 48) (<= code 57)) (- code 48))
((and (>= code 65) (<= code 70)) (- code 55))
((and (>= code 97) (<= code 102)) (- code 87))
(true 0)))))
;; ── Keyword set ───────────────────────────────────────────────────
(define
hs-keywords
(list
"on"
"end"
"set"
"to"
"put"
"into"
"before"
"after"
"add"
"remove"
"toggle"
"if"
"else"
"otherwise"
"then"
"from"
"in"
"of"
"for"
"until"
"wait"
"send"
"trigger"
"call"
"get"
"take"
"log"
"hide"
"show"
"repeat"
"while"
"times"
"forever"
"break"
"continue"
"return"
"throw"
"catch"
"finally"
"def"
"tell"
"make"
"fetch"
"as"
"with"
"every"
"or"
"and"
"not"
"is"
"no"
"the"
"my"
"me"
"it"
"its"
"result"
"true"
"false"
"null"
"when"
"between"
"at"
"by"
"queue"
"elsewhere"
"event"
"target"
"detail"
"sender"
"index"
"indexed"
"increment"
"decrement"
"append"
"settle"
"transition"
"view"
"over"
"closest"
"next"
"previous"
"first"
"last"
"random"
"pick"
"empty"
"clear"
"swap"
"open"
"close"
"exists"
"matches"
"contains"
"do"
"unless"
"you"
"your"
"new"
"init"
"start"
"go"
"js"
"less"
"than"
"greater"
"class"
"anything"
"install"
"measure"
"behavior"
"called"
"render"
"eval"
"I"
"am"
"does"
"some"
"mod"
"equal"
"equals"
"really"
"include"
"includes"
"contain"
"undefined"
"exist"
"match"
"beep"
"where"
"sorted"
"mapped"
"split"
"joined"
"descending"
"ascending"
"scroll"
"select"
"reset"
"default"
"halt"
"precedes"
"precede"
"follow"
"follows"
"ignoring"
"case"
"changes"
"focus"
"blur"
"dom"
"morph"
"using"
"giving"
"ask"
"answer"
"bind"))
(define hs-keyword? (fn (word) (some (fn (k) (= k word)) hs-keywords)))
;; ── Main tokenizer ────────────────────────────────────────────────
(define
hs-tokenize
(fn
(src)
(let
((tokens (list)) (pos 0) (src-len (len src)))
(define
hs-peek
(fn
(offset)
(if (< (+ pos offset) src-len) (nth src (+ pos offset)) nil)))
(define hs-cur (fn () (hs-peek 0)))
(define hs-advance! (fn (n) (set! pos (+ pos n))))
(define
skip-ws!
(fn
()
(when
(and (< pos src-len) (hs-ws? (hs-cur)))
(hs-advance! 1)
(skip-ws!))))
(define
skip-comment!
(fn
()
(when
(and (< pos src-len) (not (= (hs-cur) "\n")))
(hs-advance! 1)
(skip-comment!))))
(define
read-ident
(fn
(start)
(when
(and (< pos src-len) (hs-ident-char? (hs-cur)))
(hs-advance! 1)
(read-ident start))
(slice src start pos)))
(define
read-number
(fn
(start)
(define
read-int
(fn
()
(when
(and (< pos src-len) (hs-digit? (hs-cur)))
(hs-advance! 1)
(read-int))))
(read-int)
(when
(and
(< pos src-len)
(= (hs-cur) ".")
(< (+ pos 1) src-len)
(hs-digit? (hs-peek 1)))
(hs-advance! 1)
(read-int))
(do
(when
(and
(< pos src-len)
(or (= (hs-cur) "e") (= (hs-cur) "E"))
(or
(and (< (+ pos 1) src-len) (hs-digit? (hs-peek 1)))
(and
(< (+ pos 2) src-len)
(or (= (hs-peek 1) "+") (= (hs-peek 1) "-"))
(hs-digit? (hs-peek 2)))))
(hs-advance! 1)
(when
(and
(< pos src-len)
(or (= (hs-cur) "+") (= (hs-cur) "-")))
(hs-advance! 1))
(read-int))
(let
((num-end pos))
(when
(and
(< pos src-len)
(or (= (hs-cur) "m") (= (hs-cur) "s")))
(if
(and
(= (hs-cur) "m")
(< (+ pos 1) src-len)
(= (hs-peek 1) "s"))
(hs-advance! 2)
(when (= (hs-cur) "s") (hs-advance! 1))))
(slice src start pos)))))
(define
read-string
(fn
(quote-char)
(let
((chars (list)))
(hs-advance! 1)
(define
loop
(fn
()
(cond
(>= pos src-len)
(error "Unterminated string")
(= (hs-cur) "\\")
(do
(hs-advance! 1)
(when
(< pos src-len)
(let
((ch (hs-cur)))
(cond
(= ch "n")
(do (append! chars "\n") (hs-advance! 1))
(= ch "t")
(do (append! chars "\t") (hs-advance! 1))
(= ch "r")
(do (append! chars "\r") (hs-advance! 1))
(= ch "b")
(do
(append! chars (char-from-code 8))
(hs-advance! 1))
(= ch "f")
(do
(append! chars (char-from-code 12))
(hs-advance! 1))
(= ch "v")
(do
(append! chars (char-from-code 11))
(hs-advance! 1))
(= ch "\\")
(do (append! chars "\\") (hs-advance! 1))
(= ch quote-char)
(do (append! chars quote-char) (hs-advance! 1))
(= ch "x")
(do
(hs-advance! 1)
(if
(and
(< (+ pos 1) src-len)
(hs-hex-digit? (hs-cur))
(hs-hex-digit? (hs-peek 1)))
(let
((d1 (hs-hex-val (hs-cur)))
(d2 (hs-hex-val (hs-peek 1))))
(append!
chars
(char-from-code (+ (* d1 16) d2)))
(hs-advance! 2))
(error "Invalid hexadecimal escape: \\x")))
:else (do
(append! chars "\\")
(append! chars ch)
(hs-advance! 1)))))
(loop))
(= (hs-cur) quote-char)
(hs-advance! 1)
:else (do (append! chars (hs-cur)) (hs-advance! 1) (loop)))))
(loop)
(join "" chars))))
(define
read-template
(fn
()
(let
((chars (list)))
(hs-advance! 1)
(define
loop
(fn
()
(cond
(>= pos src-len)
nil
(= (hs-cur) "`")
(hs-advance! 1)
(and
(= (hs-cur) "$")
(< (+ pos 1) src-len)
(= (hs-peek 1) "{"))
(do
(append! chars "${")
(hs-advance! 2)
(let
((depth 1))
(define
inner
(fn
()
(when
(and (< pos src-len) (> depth 0))
(cond
(= (hs-cur) "{")
(do
(set! depth (+ depth 1))
(append! chars (hs-cur))
(hs-advance! 1)
(inner))
(= (hs-cur) "}")
(do
(set! depth (- depth 1))
(when (> depth 0) (append! chars (hs-cur)))
(hs-advance! 1)
(when (> depth 0) (inner)))
:else (do
(append! chars (hs-cur))
(hs-advance! 1)
(inner))))))
(inner))
(append! chars "}")
(loop))
:else (do (append! chars (hs-cur)) (hs-advance! 1) (loop)))))
(loop)
(join "" chars))))
(define
read-selector
(fn
()
(let
((chars (list)))
(hs-advance! 1)
(define
loop
(fn
()
(cond
(>= pos src-len)
nil
(and
(= (hs-cur) "/")
(< (+ pos 1) src-len)
(= (hs-peek 1) ">"))
(hs-advance! 2)
:else (do (append! chars (hs-cur)) (hs-advance! 1) (loop)))))
(loop)
(join "" chars))))
(define
read-class-name
(fn
(start)
(define
build-name
(fn
(acc depth)
(cond
((and (< pos src-len) (= (hs-cur) "\\") (< (+ pos 1) src-len))
(do
(hs-advance! 1)
(let
((c (hs-cur)))
(hs-advance! 1)
(build-name (str acc c) depth))))
((and (< pos src-len) (= (hs-cur) "["))
(do
(let
((c (hs-cur)))
(hs-advance! 1)
(build-name (str acc c) (+ depth 1)))))
((and (< pos src-len) (= (hs-cur) "]"))
(do
(let
((c (hs-cur)))
(hs-advance! 1)
(build-name
(str acc c)
(if (> depth 0) (- depth 1) 0)))))
((and (< pos src-len) (> depth 0) (or (= (hs-cur) "(") (= (hs-cur) ")")))
(do
(let
((c (hs-cur)))
(hs-advance! 1)
(build-name (str acc c) depth))))
((and (< pos src-len) (or (hs-ident-char? (hs-cur)) (= (hs-cur) ":") (= (hs-cur) "&")))
(do
(let
((c (hs-cur)))
(hs-advance! 1)
(build-name (str acc c) depth))))
(true acc))))
(build-name "" 0)))
(define
hs-emit!
(fn
(type value start)
(let
((tok (hs-make-token type value start))
(end-pos
(max pos (+ start (if (nil? value) 0 (len (str value)))))))
(do
(dict-set! tok "end" end-pos)
(dict-set! tok "line" (len (split (slice src 0 start) "\n")))
(append! tokens tok)))))
(define
scan!
(fn
()
(let
((ws-start pos))
(skip-ws!)
(when
(and (> (len tokens) 0) (> pos ws-start))
(hs-emit! "whitespace" (slice src ws-start pos) ws-start)))
(when
(< pos src-len)
(let
((ch (hs-cur)) (start pos))
(cond
(and (= ch "-") (< (+ pos 1) src-len) (= (hs-peek 1) "-"))
(do (hs-advance! 2) (skip-comment!) (scan!))
(and (= ch "/") (< (+ pos 1) src-len) (= (hs-peek 1) "/"))
(do (hs-advance! 2) (skip-comment!) (scan!))
(and
(= ch "<")
(< (+ pos 1) src-len)
(not (= (hs-peek 1) "="))
(or
(hs-letter? (hs-peek 1))
(= (hs-peek 1) ".")
(= (hs-peek 1) "#")
(= (hs-peek 1) "[")
(= (hs-peek 1) "*")
(= (hs-peek 1) ":")
(= (hs-peek 1) "$")))
(do (hs-emit! "selector" (read-selector) start) (scan!))
(and (= ch ".") (< (+ pos 1) src-len) (= (hs-peek 1) "."))
(do (hs-emit! "op" ".." start) (hs-advance! 2) (scan!))
(and
(= ch ".")
(< (+ pos 1) src-len)
(or
(hs-letter? (hs-peek 1))
(= (hs-peek 1) "-")
(= (hs-peek 1) "_"))
(> (len tokens) 0)
(let
((lt (dict-get (nth tokens (- (len tokens) 1)) :type)))
(or
(= lt "paren-close")
(= lt "brace-close")
(= lt "bracket-close"))))
(do (hs-emit! "dot" "." start) (hs-advance! 1) (scan!))
(and
(= ch ".")
(< (+ pos 1) src-len)
(or
(hs-letter? (hs-peek 1))
(= (hs-peek 1) "-")
(= (hs-peek 1) "_")))
(do
(hs-advance! 1)
(hs-emit! "class" (read-class-name pos) start)
(scan!))
(and
(= ch "#")
(< (+ pos 1) src-len)
(hs-ident-start? (hs-peek 1))
(> (len tokens) 0)
(let
((lt (dict-get (nth tokens (- (len tokens) 1)) :type)))
(or
(= lt "paren-close")
(= lt "brace-close")
(= lt "bracket-close"))))
(do (hs-emit! "op" "#" start) (hs-advance! 1) (scan!))
(and
(= ch "#")
(< (+ pos 1) src-len)
(hs-ident-start? (hs-peek 1)))
(do
(hs-advance! 1)
(hs-emit! "id" (read-ident pos) start)
(scan!))
(and
(= ch "@")
(< (+ pos 1) src-len)
(hs-ident-char? (hs-peek 1)))
(do
(hs-advance! 1)
(hs-emit! "attr" (read-ident pos) start)
(scan!))
(and
(= ch "^")
(< (+ pos 1) src-len)
(hs-ident-char? (hs-peek 1)))
(do
(hs-advance! 1)
(hs-emit! "hat" (read-ident pos) start)
(scan!))
(and
(= ch "~")
(< (+ pos 1) src-len)
(hs-letter? (hs-peek 1)))
(do
(hs-advance! 1)
(hs-emit! "component" (str "~" (read-ident pos)) start)
(scan!))
(and
(= ch "*")
(< (+ pos 1) src-len)
(hs-letter? (hs-peek 1)))
(do
(hs-advance! 1)
(hs-emit! "style" (read-ident pos) start)
(scan!))
(and
(= ch ":")
(< (+ pos 1) src-len)
(hs-ident-start? (hs-peek 1)))
(do
(hs-advance! 1)
(hs-emit! "local" (read-ident pos) start)
(scan!))
(or
(= ch "\"")
(and
(= ch "'")
(not
(and
(< (+ pos 1) src-len)
(= (hs-peek 1) "s")
(or
(>= (+ pos 2) src-len)
(not (hs-ident-char? (hs-peek 2))))))))
(do (hs-emit! "string" (read-string ch) start) (scan!))
(= ch "`")
(do (hs-emit! "template" (read-template) start) (scan!))
(hs-digit? ch)
(do (hs-emit! "number" (read-number start) start) (scan!))
(hs-ident-start? ch)
(do
(let
((word (read-ident start)))
(let
((full-word (if (and (< pos src-len) (= (hs-cur) "'") (< (+ pos 1) src-len) (hs-letter? (hs-peek 1)) (not (and (= (hs-peek 1) "s") (or (>= (+ pos 2) src-len) (not (hs-ident-char? (hs-peek 2))))))) (do (hs-advance! 1) (str word "'" (read-ident pos))) word)))
(hs-emit!
(if (hs-keyword? full-word) "keyword" "ident")
full-word
start)))
(scan!))
(and
(or (= ch "=") (= ch "!") (= ch "<") (= ch ">"))
(< (+ pos 1) src-len)
(= (hs-peek 1) "="))
(do
(if
(and
(or (= ch "=") (= ch "!"))
(< (+ pos 2) src-len)
(= (hs-peek 2) "="))
(do (hs-emit! "op" (str ch "==") start) (hs-advance! 3))
(do (hs-emit! "op" (str ch "=") start) (hs-advance! 2)))
(scan!))
(and
(= ch "'")
(< (+ pos 1) src-len)
(= (hs-peek 1) "s")
(or
(>= (+ pos 2) src-len)
(not (hs-ident-char? (hs-peek 2)))))
(do (hs-emit! "op" "'s" start) (hs-advance! 2) (scan!))
(= ch "(")
(do
(hs-emit! "paren-open" "(" start)
(hs-advance! 1)
(scan!))
(= ch ")")
(do
(hs-emit! "paren-close" ")" start)
(hs-advance! 1)
(scan!))
(= ch "[")
(do
(hs-emit! "bracket-open" "[" start)
(hs-advance! 1)
(scan!))
(= ch "]")
(do
(hs-emit! "bracket-close" "]" start)
(hs-advance! 1)
(scan!))
(= ch "{")
(do
(hs-emit! "brace-open" "{" start)
(hs-advance! 1)
(scan!))
(= ch "}")
(do
(hs-emit! "brace-close" "}" start)
(hs-advance! 1)
(scan!))
(= ch ",")
(do (hs-emit! "comma" "," start) (hs-advance! 1) (scan!))
(= ch "+")
(do (hs-emit! "op" "+" start) (hs-advance! 1) (scan!))
(= ch "-")
(do (hs-emit! "op" "-" start) (hs-advance! 1) (scan!))
(= ch "/")
(do (hs-emit! "op" "/" start) (hs-advance! 1) (scan!))
(= ch "=")
(do (hs-emit! "op" "=" start) (hs-advance! 1) (scan!))
(= ch "<")
(do (hs-emit! "op" "<" start) (hs-advance! 1) (scan!))
(= ch ">")
(do (hs-emit! "op" ">" start) (hs-advance! 1) (scan!))
(= ch "!")
(do (hs-emit! "op" "!" start) (hs-advance! 1) (scan!))
(= ch "*")
(do (hs-emit! "op" "*" start) (hs-advance! 1) (scan!))
(= ch "%")
(do (hs-emit! "op" "%" start) (hs-advance! 1) (scan!))
(= ch ".")
(do (hs-emit! "dot" "." start) (hs-advance! 1) (scan!))
(= ch "\\")
(do (hs-emit! "op" "\\" start) (hs-advance! 1) (scan!))
(= ch ":")
(do (hs-emit! "colon" ":" start) (hs-advance! 1) (scan!))
(= ch "|")
(do (hs-emit! "op" "|" start) (hs-advance! 1) (scan!))
(= ch "&")
(do (hs-emit! "op" "&" start) (hs-advance! 1) (scan!))
(= ch "#")
(do (hs-emit! "op" "#" start) (hs-advance! 1) (scan!))
(= ch "?")
(do (hs-emit! "op" "?" start) (hs-advance! 1) (scan!))
(= ch ";")
(do (hs-emit! "op" ";" start) (hs-advance! 1) (scan!))
:else (do (hs-advance! 1) (scan!)))))))
(scan!)
(hs-emit! "eof" nil pos)
tokens)))
;; ── Template-mode tokenizer (E37 API) ────────────────────────────────
;; Used by hs-tokens-of when :template flag is set.
;; Emits outer " chars as single STRING tokens; ${ ... } as $ { <inner-tokens> };
;; inner content is tokenized with the regular hs-tokenize.
(define
hs-tokenize-template
(fn
(src)
(let
((tokens (list)) (pos 0) (src-len (len src)))
(define t-cur (fn () (if (< pos src-len) (nth src pos) nil)))
(define t-peek (fn (n) (if (< (+ pos n) src-len) (nth src (+ pos n)) nil)))
(define t-advance! (fn (n) (set! pos (+ pos n))))
(define t-emit! (fn (type value) (append! tokens (hs-make-token type value pos))))
(define
scan-to-close!
(fn
(depth)
(when
(and (< pos src-len) (> depth 0))
(cond
(= (t-cur) "{")
(do (t-advance! 1) (scan-to-close! (+ depth 1)))
(= (t-cur) "}")
(when (> (- depth 1) 0) (t-advance! 1) (scan-to-close! (- depth 1)))
:else (do (t-advance! 1) (scan-to-close! depth))))))
(define
scan-template!
(fn
()
(when
(< pos src-len)
(let
((ch (t-cur)))
(cond
(= ch "\"")
(do (t-emit! "string" "\"") (t-advance! 1) (scan-template!))
(and (= ch "$") (= (t-peek 1) "{"))
(do
(t-emit! "op" "$")
(t-advance! 1)
(t-emit! "brace-open" "{")
(t-advance! 1)
(let
((inner-start pos))
(scan-to-close! 1)
(let
((inner-src (slice src inner-start pos))
(inner-toks (hs-tokenize inner-src)))
(for-each
(fn (tok)
(when (not (= (get tok "type") "eof"))
(append! tokens tok)))
inner-toks))
(t-emit! "brace-close" "}")
(when (< pos src-len) (t-advance! 1)))
(scan-template!))
(= ch "$")
(do (t-emit! "op" "$") (t-advance! 1) (scan-template!))
(hs-ws? ch)
(do (t-advance! 1) (scan-template!))
:else (do (t-advance! 1) (scan-template!)))))))
(scan-template!)
(t-emit! "eof" nil)
tokens)))
;; ── Stream wrapper for upstream-style stateful tokenizer API ───────────────
;;
;; Upstream _hyperscript exposes a Tokens object with cursor + follow-set
;; semantics on _hyperscript.internals.tokenizer. Our hs-tokenize returns a
;; flat list; the stream wrapper adds the stateful operations.
;;
;; Type names map ours → upstream's (e.g. "ident" → "IDENTIFIER").
(define
hs-stream-type-map
(fn
(t)
(cond
((= t "ident") "IDENTIFIER")
((= t "number") "NUMBER")
((= t "string") "STRING")
((= t "class") "CLASS_REF")
((= t "id") "ID_REF")
((= t "attr") "ATTRIBUTE_REF")
((= t "style") "STYLE_REF")
((= t "whitespace") "WHITESPACE")
((= t "op") "OPERATOR")
((= t "eof") "EOF")
(true (upcase t)))))
;; Create a stream from a source string.
;; Returns a dict — mutable via dict-set!.
(define
hs-stream
(fn
(src)
{:tokens (hs-tokenize src) :pos 0 :follows (list) :last-match nil :last-ws nil}))
;; Skip whitespace tokens, advancing pos to the next non-WS token.
;; Captures the last skipped whitespace value into :last-ws.
(define
hs-stream-skip-ws!
(fn
(s)
(let
((tokens (get s :tokens)))
(define
loop
(fn
()
(let
((p (get s :pos)))
(when
(and (< p (len tokens))
(= (get (nth tokens p) :type) "whitespace"))
(do
(dict-set! s :last-ws (get (nth tokens p) :value))
(dict-set! s :pos (+ p 1))
(loop))))))
(loop))))
;; Current token (after skipping whitespace).
(define
hs-stream-current
(fn
(s)
(do
(hs-stream-skip-ws! s)
(let
((tokens (get s :tokens)) (p (get s :pos)))
(if (< p (len tokens)) (nth tokens p) nil)))))
;; Returns the current token if its value matches; advances and updates
;; :last-match. Returns nil otherwise (no advance).
;; Honors the follow set: tokens whose value is in :follows do NOT match.
(define
hs-stream-match
(fn
(s value)
(let
((cur (hs-stream-current s)))
(cond
((nil? cur) nil)
((some (fn (f) (= f value)) (get s :follows)) nil)
((= (get cur :value) value)
(do
(dict-set! s :pos (+ (get s :pos) 1))
(dict-set! s :last-match cur)
cur))
(true nil)))))
;; Match by upstream-style type name. Accepts any number of allowed types.
(define
hs-stream-match-type
(fn
(s &rest types)
(let
((cur (hs-stream-current s)))
(cond
((nil? cur) nil)
((some (fn (t) (= (hs-stream-type-map (get cur :type)) t)) types)
(do
(dict-set! s :pos (+ (get s :pos) 1))
(dict-set! s :last-match cur)
cur))
(true nil)))))
;; Match if value is one of the given names.
(define
hs-stream-match-any
(fn
(s &rest names)
(let
((cur (hs-stream-current s)))
(cond
((nil? cur) nil)
((some (fn (n) (= (get cur :value) n)) names)
(do
(dict-set! s :pos (+ (get s :pos) 1))
(dict-set! s :last-match cur)
cur))
(true nil)))))
;; Match an op token whose value is in the list.
(define
hs-stream-match-any-op
(fn
(s &rest ops)
(let
((cur (hs-stream-current s)))
(cond
((nil? cur) nil)
((and (= (get cur :type) "op")
(some (fn (o) (= (get cur :value) o)) ops))
(do
(dict-set! s :pos (+ (get s :pos) 1))
(dict-set! s :last-match cur)
cur))
(true nil)))))
;; Peek N non-WS tokens ahead. Returns the token if its value matches; nil otherwise.
(define
hs-stream-peek
(fn
(s value offset)
(let
((tokens (get s :tokens)))
(define
skip-n-non-ws
(fn
(p remaining)
(cond
((>= p (len tokens)) -1)
((= (get (nth tokens p) :type) "whitespace")
(skip-n-non-ws (+ p 1) remaining))
((= remaining 0) p)
(true (skip-n-non-ws (+ p 1) (- remaining 1))))))
(let
((p (skip-n-non-ws (get s :pos) offset)))
(if (and (>= p 0) (< p (len tokens))
(= (get (nth tokens p) :value) value))
(nth tokens p)
nil)))))
;; Consume tokens until one whose value matches the marker. Returns
;; the consumed list (excluding the marker). Marker becomes current.
(define
hs-stream-consume-until
(fn
(s marker)
(let
((tokens (get s :tokens)) (out (list)))
(define
loop
(fn
(acc)
(let
((p (get s :pos)))
(cond
((>= p (len tokens)) acc)
((= (get (nth tokens p) :value) marker) acc)
(true
(do
(dict-set! s :pos (+ p 1))
(loop (append acc (list (nth tokens p))))))))))
(loop out))))
;; Consume until the next whitespace token; returns the consumed list.
(define
hs-stream-consume-until-ws
(fn
(s)
(let
((tokens (get s :tokens)))
(define
loop
(fn
(acc)
(let
((p (get s :pos)))
(cond
((>= p (len tokens)) acc)
((= (get (nth tokens p) :type) "whitespace") acc)
(true
(do
(dict-set! s :pos (+ p 1))
(loop (append acc (list (nth tokens p))))))))))
(loop (list)))))
;; Follow-set management.
(define hs-stream-push-follow! (fn (s v) (dict-set! s :follows (cons v (get s :follows)))))
(define
hs-stream-pop-follow!
(fn (s) (let ((f (get s :follows))) (when (> (len f) 0) (dict-set! s :follows (rest f))))))
(define
hs-stream-push-follows!
(fn (s vs) (for-each (fn (v) (hs-stream-push-follow! s v)) vs)))
(define
hs-stream-pop-follows!
(fn (s n) (when (> n 0) (do (hs-stream-pop-follow! s) (hs-stream-pop-follows! s (- n 1))))))
(define
hs-stream-clear-follows!
(fn (s) (let ((saved (get s :follows))) (do (dict-set! s :follows (list)) saved))))
(define
hs-stream-restore-follows!
(fn (s saved) (dict-set! s :follows saved)))
;; Last-consumed token / whitespace.
(define hs-stream-last-match (fn (s) (get s :last-match)))
(define hs-stream-last-ws (fn (s) (get s :last-ws)))