Merge branch 'hs-e37-tokenizer' into loops/hs

This commit is contained in:
2026-04-26 17:54:11 +00:00
7 changed files with 3593 additions and 3010 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +1,6 @@
;; _hyperscript tokenizer — produces token stream from hyperscript source
;;
;; Tokens: {:type T :value V :pos P :end E :line L}
;; Tokens: {:type T :value V :pos P}
;; Types: "keyword" "ident" "number" "string" "class" "id" "attr" "style"
;; "selector" "op" "dot" "paren-open" "paren-close" "bracket-open"
;; "bracket-close" "brace-open" "brace-close" "comma" "colon"
@@ -8,7 +8,7 @@
;; ── Token constructor ─────────────────────────────────────────────
(define hs-make-token (fn (type value pos end line) {:pos pos :end end :line line :value value :type type}))
(define hs-make-token (fn (type value pos) {:pos pos :value value :type type}))
;; ── Character predicates ──────────────────────────────────────────
@@ -28,6 +28,27 @@
(define hs-ws? (fn (c) (or (= c " ") (= c "\t") (= c "\n") (= c "\r"))))
(define
hs-hex-digit?
(fn
(c)
(or
(and (>= c "0") (<= c "9"))
(and (>= c "a") (<= c "f"))
(and (>= c "A") (<= c "F")))))
(define
hs-hex-val
(fn
(c)
(let
((code (char-code c)))
(cond
((and (>= code 48) (<= code 57)) (- code 48))
((and (>= code 65) (<= code 70)) (- code 55))
((and (>= code 97) (<= code 102)) (- code 87))
(true 0)))))
;; ── Keyword set ───────────────────────────────────────────────────
(define
@@ -198,22 +219,14 @@
(fn
(src)
(let
((tokens (list)) (pos 0) (src-len (len src)) (current-line 1))
((tokens (list)) (pos 0) (src-len (len src)))
(define
hs-peek
(fn
(offset)
(if (< (+ pos offset) src-len) (nth src (+ pos offset)) nil)))
(define hs-cur (fn () (hs-peek 0)))
(define
hs-advance!
(fn
(n)
(when
(> n 0)
(when (= (hs-cur) "\n") (set! current-line (+ current-line 1)))
(set! pos (+ pos 1))
(hs-advance! (- n 1)))))
(define hs-advance! (fn (n) (set! pos (+ pos n))))
(define
skip-ws!
(fn
@@ -243,10 +256,15 @@
read-number
(fn
(start)
(when
(and (< pos src-len) (hs-digit? (hs-cur)))
(hs-advance! 1)
(read-number start))
(define
read-int
(fn
()
(when
(and (< pos src-len) (hs-digit? (hs-cur)))
(hs-advance! 1)
(read-int))))
(read-int)
(when
(and
(< pos src-len)
@@ -254,15 +272,7 @@
(< (+ pos 1) src-len)
(hs-digit? (hs-peek 1)))
(hs-advance! 1)
(define
read-frac
(fn
()
(when
(and (< pos src-len) (hs-digit? (hs-cur)))
(hs-advance! 1)
(read-frac))))
(read-frac))
(read-int))
(do
(when
(and
@@ -280,15 +290,7 @@
(< pos src-len)
(or (= (hs-cur) "+") (= (hs-cur) "-")))
(hs-advance! 1))
(define
read-exp-digits
(fn
()
(when
(and (< pos src-len) (hs-digit? (hs-cur)))
(hs-advance! 1)
(read-exp-digits))))
(read-exp-digits))
(read-int))
(let
((num-end pos))
(when
@@ -316,7 +318,7 @@
()
(cond
(>= pos src-len)
nil
(error "Unterminated string")
(= (hs-cur) "\\")
(do
(hs-advance! 1)
@@ -326,15 +328,37 @@
((ch (hs-cur)))
(cond
(= ch "n")
(append! chars "\n")
(do (append! chars "\n") (hs-advance! 1))
(= ch "t")
(append! chars "\t")
(do (append! chars "\t") (hs-advance! 1))
(= ch "r")
(do (append! chars "\r") (hs-advance! 1))
(= ch "b")
(do (append! chars (char-from-code 8)) (hs-advance! 1))
(= ch "f")
(do (append! chars (char-from-code 12)) (hs-advance! 1))
(= ch "v")
(do (append! chars (char-from-code 11)) (hs-advance! 1))
(= ch "\\")
(append! chars "\\")
(do (append! chars "\\") (hs-advance! 1))
(= ch quote-char)
(append! chars quote-char)
:else (do (append! chars "\\") (append! chars ch)))
(hs-advance! 1)))
(do (append! chars quote-char) (hs-advance! 1))
(= ch "x")
(do
(hs-advance! 1)
(if
(and
(< (+ pos 1) src-len)
(hs-hex-digit? (hs-cur))
(hs-hex-digit? (hs-peek 1)))
(let
((d1 (hs-hex-val (hs-cur)))
(d2 (hs-hex-val (hs-peek 1))))
(append! chars (char-from-code (+ (* d1 16) d2)))
(hs-advance! 2))
(error "Invalid hexadecimal escape: \\x")))
:else
(do (append! chars "\\") (append! chars ch) (hs-advance! 1)))))
(loop))
(= (hs-cur) quote-char)
(hs-advance! 1)
@@ -435,8 +459,8 @@
(define
hs-emit!
(fn
(type value start start-line)
(append! tokens (hs-make-token type value start pos start-line))))
(type value start)
(append! tokens (hs-make-token type value start))))
(define
scan!
(fn
@@ -445,7 +469,7 @@
(when
(< pos src-len)
(let
((ch (hs-cur)) (start pos) (start-line current-line))
((ch (hs-cur)) (start pos))
(cond
(and (= ch "-") (< (+ pos 1) src-len) (= (hs-peek 1) "-"))
(do (hs-advance! 2) (skip-comment!) (scan!))
@@ -462,9 +486,9 @@
(= (hs-peek 1) "[")
(= (hs-peek 1) "*")
(= (hs-peek 1) ":")))
(do (hs-emit! "selector" (read-selector) start start-line) (scan!))
(do (hs-emit! "selector" (read-selector) start) (scan!))
(and (= ch ".") (< (+ pos 1) src-len) (= (hs-peek 1) "."))
(do (hs-advance! 2) (hs-emit! "op" ".." start start-line) (scan!))
(do (hs-emit! "op" ".." start) (hs-advance! 2) (scan!))
(and
(= ch ".")
(< (+ pos 1) src-len)
@@ -474,7 +498,7 @@
(= (hs-peek 1) "_")))
(do
(hs-advance! 1)
(hs-emit! "class" (read-class-name pos) start start-line)
(hs-emit! "class" (read-class-name pos) start)
(scan!))
(and
(= ch "#")
@@ -482,7 +506,7 @@
(hs-ident-start? (hs-peek 1)))
(do
(hs-advance! 1)
(hs-emit! "id" (read-ident pos) start start-line)
(hs-emit! "id" (read-ident pos) start)
(scan!))
(and
(= ch "@")
@@ -490,7 +514,7 @@
(hs-ident-char? (hs-peek 1)))
(do
(hs-advance! 1)
(hs-emit! "attr" (read-ident pos) start start-line)
(hs-emit! "attr" (read-ident pos) start)
(scan!))
(and
(= ch "^")
@@ -498,7 +522,7 @@
(hs-ident-char? (hs-peek 1)))
(do
(hs-advance! 1)
(hs-emit! "hat" (read-ident pos) start start-line)
(hs-emit! "hat" (read-ident pos) start)
(scan!))
(and
(= ch "~")
@@ -506,7 +530,7 @@
(hs-letter? (hs-peek 1)))
(do
(hs-advance! 1)
(hs-emit! "component" (str "~" (read-ident pos)) start start-line)
(hs-emit! "component" (str "~" (read-ident pos)) start)
(scan!))
(and
(= ch "*")
@@ -514,7 +538,7 @@
(hs-letter? (hs-peek 1)))
(do
(hs-advance! 1)
(hs-emit! "style" (read-ident pos) start start-line)
(hs-emit! "style" (read-ident pos) start)
(scan!))
(and
(= ch ":")
@@ -522,7 +546,7 @@
(hs-ident-start? (hs-peek 1)))
(do
(hs-advance! 1)
(hs-emit! "local" (read-ident pos) start start-line)
(hs-emit! "local" (read-ident pos) start)
(scan!))
(or
(= ch "\"")
@@ -535,11 +559,11 @@
(or
(>= (+ pos 2) src-len)
(not (hs-ident-char? (hs-peek 2))))))))
(do (hs-emit! "string" (read-string ch) start start-line) (scan!))
(do (hs-emit! "string" (read-string ch) start) (scan!))
(= ch "`")
(do (hs-emit! "template" (read-template) start start-line) (scan!))
(do (hs-emit! "template" (read-template) start) (scan!))
(hs-digit? ch)
(do (hs-emit! "number" (read-number start) start start-line) (scan!))
(do (hs-emit! "number" (read-number start) start) (scan!))
(hs-ident-start? ch)
(do
(let
@@ -547,8 +571,7 @@
(hs-emit!
(if (hs-keyword? word) "keyword" "ident")
word
start
start-line))
start))
(scan!))
(and
(or (= ch "=") (= ch "!") (= ch "<") (= ch ">"))
@@ -560,8 +583,8 @@
(or (= ch "=") (= ch "!"))
(< (+ pos 2) src-len)
(= (hs-peek 2) "="))
(do (hs-advance! 3) (hs-emit! "op" (str ch "==") start start-line))
(do (hs-advance! 2) (hs-emit! "op" (str ch "=") start start-line)))
(do (hs-emit! "op" (str ch "==") start) (hs-advance! 3))
(do (hs-emit! "op" (str ch "=") start) (hs-advance! 2)))
(scan!))
(and
(= ch "'")
@@ -570,66 +593,141 @@
(or
(>= (+ pos 2) src-len)
(not (hs-ident-char? (hs-peek 2)))))
(do (hs-advance! 2) (hs-emit! "op" "'s" start start-line) (scan!))
(do (hs-emit! "op" "'s" start) (hs-advance! 2) (scan!))
(= ch "(")
(do
(hs-emit! "paren-open" "(" start)
(hs-advance! 1)
(hs-emit! "paren-open" "(" start start-line)
(scan!))
(= ch ")")
(do
(hs-emit! "paren-close" ")" start)
(hs-advance! 1)
(hs-emit! "paren-close" ")" start start-line)
(scan!))
(= ch "[")
(do
(hs-emit! "bracket-open" "[" start)
(hs-advance! 1)
(hs-emit! "bracket-open" "[" start start-line)
(scan!))
(= ch "]")
(do
(hs-emit! "bracket-close" "]" start)
(hs-advance! 1)
(hs-emit! "bracket-close" "]" start start-line)
(scan!))
(= ch "{")
(do
(hs-emit! "brace-open" "{" start)
(hs-advance! 1)
(hs-emit! "brace-open" "{" start start-line)
(scan!))
(= ch "}")
(do
(hs-emit! "brace-close" "}" start)
(hs-advance! 1)
(hs-emit! "brace-close" "}" start start-line)
(scan!))
(= ch ",")
(do (hs-advance! 1) (hs-emit! "comma" "," start start-line) (scan!))
(do (hs-emit! "comma" "," start) (hs-advance! 1) (scan!))
(= ch "+")
(do (hs-advance! 1) (hs-emit! "op" "+" start start-line) (scan!))
(do (hs-emit! "op" "+" start) (hs-advance! 1) (scan!))
(= ch "-")
(do (hs-advance! 1) (hs-emit! "op" "-" start start-line) (scan!))
(do (hs-emit! "op" "-" start) (hs-advance! 1) (scan!))
(= ch "/")
(do (hs-advance! 1) (hs-emit! "op" "/" start start-line) (scan!))
(do (hs-emit! "op" "/" start) (hs-advance! 1) (scan!))
(= ch "=")
(do (hs-advance! 1) (hs-emit! "op" "=" start start-line) (scan!))
(do (hs-emit! "op" "=" start) (hs-advance! 1) (scan!))
(= ch "<")
(do (hs-advance! 1) (hs-emit! "op" "<" start start-line) (scan!))
(do (hs-emit! "op" "<" start) (hs-advance! 1) (scan!))
(= ch ">")
(do (hs-advance! 1) (hs-emit! "op" ">" start start-line) (scan!))
(do (hs-emit! "op" ">" start) (hs-advance! 1) (scan!))
(= ch "!")
(do (hs-advance! 1) (hs-emit! "op" "!" start start-line) (scan!))
(do (hs-emit! "op" "!" start) (hs-advance! 1) (scan!))
(= ch "*")
(do (hs-advance! 1) (hs-emit! "op" "*" start start-line) (scan!))
(do (hs-emit! "op" "*" start) (hs-advance! 1) (scan!))
(= ch "%")
(do (hs-advance! 1) (hs-emit! "op" "%" start start-line) (scan!))
(do (hs-emit! "op" "%" start) (hs-advance! 1) (scan!))
(= ch ".")
(do (hs-advance! 1) (hs-emit! "dot" "." start start-line) (scan!))
(do (hs-emit! "dot" "." start) (hs-advance! 1) (scan!))
(= ch "\\")
(do (hs-advance! 1) (hs-emit! "op" "\\" start start-line) (scan!))
(do (hs-emit! "op" "\\" start) (hs-advance! 1) (scan!))
(= ch ":")
(do (hs-advance! 1) (hs-emit! "colon" ":" start start-line) (scan!))
(do (hs-emit! "colon" ":" start) (hs-advance! 1) (scan!))
(= ch "|")
(do (hs-advance! 1) (hs-emit! "op" "|" start start-line) (scan!))
(do (hs-emit! "op" "|" start) (hs-advance! 1) (scan!))
(= ch "&")
(do (hs-emit! "op" "&" start) (hs-advance! 1) (scan!))
(= ch "#")
(do (hs-emit! "op" "#" start) (hs-advance! 1) (scan!))
(= ch "?")
(do (hs-emit! "op" "?" start) (hs-advance! 1) (scan!))
(= ch ";")
(do (hs-emit! "op" ";" start) (hs-advance! 1) (scan!))
:else (do (hs-advance! 1) (scan!)))))))
(scan!)
(hs-emit! "eof" nil pos current-line)
(hs-emit! "eof" nil pos)
tokens)))
;; ── Template-mode tokenizer (E37 API) ────────────────────────────────
;; Used by hs-tokens-of when :template flag is set.
;; Emits outer " chars as single STRING tokens; ${ ... } as $ { <inner-tokens> };
;; inner content is tokenized with the regular hs-tokenize.
(define
hs-tokenize-template
(fn
(src)
(let
((tokens (list)) (pos 0) (src-len (len src)))
(define t-cur (fn () (if (< pos src-len) (nth src pos) nil)))
(define t-peek (fn (n) (if (< (+ pos n) src-len) (nth src (+ pos n)) nil)))
(define t-advance! (fn (n) (set! pos (+ pos n))))
(define t-emit! (fn (type value) (append! tokens (hs-make-token type value pos))))
(define
scan-to-close!
(fn
(depth)
(when
(and (< pos src-len) (> depth 0))
(cond
(= (t-cur) "{")
(do (t-advance! 1) (scan-to-close! (+ depth 1)))
(= (t-cur) "}")
(when (> (- depth 1) 0) (t-advance! 1) (scan-to-close! (- depth 1)))
:else (do (t-advance! 1) (scan-to-close! depth))))))
(define
scan-template!
(fn
()
(when
(< pos src-len)
(let
((ch (t-cur)))
(cond
(= ch "\"")
(do (t-emit! "string" "\"") (t-advance! 1) (scan-template!))
(and (= ch "$") (= (t-peek 1) "{"))
(do
(t-emit! "op" "$")
(t-advance! 1)
(t-emit! "brace-open" "{")
(t-advance! 1)
(let
((inner-start pos))
(scan-to-close! 1)
(let
((inner-src (slice src inner-start pos))
(inner-toks (hs-tokenize inner-src)))
(for-each
(fn (tok)
(when (not (= (get tok "type") "eof"))
(append! tokens tok)))
inner-toks))
(t-emit! "brace-close" "}")
(when (< pos src-len) (t-advance! 1)))
(scan-template!))
(= ch "$")
(do (t-emit! "op" "$") (t-advance! 1) (scan-template!))
(hs-ws? ch)
(do (t-advance! 1) (scan-template!))
:else (do (t-advance! 1) (scan-template!)))))))
(scan-template!)
(t-emit! "eof" nil)
tokens)))