3 Commits

Author SHA1 Message Date
3003c8a069 HS E37 step 5: hs-tokenize-template + template routing in hs-tokens-of
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 12s
Add hs-tokenize-template: scans " as single STRING token, ${ ... }
as dollar+brace+inner-tokens (inner tokenized with hs-tokenize), and
} as brace-close. Update hs-tokens-of to call hs-tokenize-template
when :template keyword arg is passed. Unlocks tests 1 and 15.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-25 19:08:38 +00:00
8c62137d32 HS E37 step 2: extend read-string escapes + unterminated/hex errors
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Has been cancelled
Add \r \b \f \v and \xNN escape handling to read-string. Use
char-from-code for non-SX-literal chars. Throw "Unterminated string"
on EOF inside a string literal. Throw "Invalid hexadecimal escape: \x"
on bad \xNN. Add hs-hex-digit? and hs-hex-val helpers. Unlocks
tests 2, 6, 13, 14 once generator lands.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-25 19:03:03 +00:00
8ac669c739 HS E37 step 1: hs-api-tokens + stream/token helpers in runtime.sx
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Has been cancelled
Add hs-eof-sentinel, hs-op-type, hs-raw->api-token, hs-tokens-of,
hs-stream-token, hs-stream-consume, hs-stream-has-more, and the
three token accessors (hs-token-type, hs-token-value, hs-token-op?).
No test delta yet — API-only, generator comes in step 6.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-25 18:56:26 +00:00
2 changed files with 235 additions and 7 deletions

View File

@@ -2525,3 +2525,123 @@
(fn (fn
(fn-name args) (fn-name args)
(let ((fn (host-global fn-name))) (if fn (host-call-fn fn args) nil)))) (let ((fn (host-global fn-name))) (if fn (host-call-fn fn args) nil))))
;; ── E37 Tokenizer-as-API ─────────────────────────────────────────────
(define hs-eof-sentinel (fn () {:type "EOF" :value "<<<EOF>>>" :op false}))
(define
hs-op-type
(fn
(val)
(cond
((= val "+") "PLUS")
((= val "-") "MINUS")
((= val "*") "MULTIPLY")
((= val "/") "SLASH")
((= val "%") "PERCENT")
((= val "|") "PIPE")
((= val "!") "EXCLAMATION")
((= val "?") "QUESTION")
((= val "#") "POUND")
((= val "&") "AMPERSAND")
((= val ";") "SEMI")
((= val "=") "EQUALS")
((= val "<") "L_ANG")
((= val ">") "R_ANG")
((= val "<=") "LTE_ANG")
((= val ">=") "GTE_ANG")
((= val "==") "EQ")
((= val "===") "EQQ")
((= val "\\") "BACKSLASH")
(true (str "OP_" val)))))
(define
hs-raw->api-token
(fn
(tok)
(let
((raw-type (get tok "type"))
(raw-val (get tok "value")))
(let
((up-type
(cond
((or (= raw-type "ident") (= raw-type "keyword")) "IDENTIFIER")
((= raw-type "number") "NUMBER")
((= raw-type "string") "STRING")
((= raw-type "class") "CLASS_REF")
((= raw-type "id") "ID_REF")
((= raw-type "attr") "ATTRIBUTE_REF")
((= raw-type "style") "STYLE_REF")
((= raw-type "selector") "QUERY_REF")
((= raw-type "eof") "EOF")
((= raw-type "paren-open") "L_PAREN")
((= raw-type "paren-close") "R_PAREN")
((= raw-type "bracket-open") "L_BRACKET")
((= raw-type "bracket-close") "R_BRACKET")
((= raw-type "brace-open") "L_BRACE")
((= raw-type "brace-close") "R_BRACE")
((= raw-type "comma") "COMMA")
((= raw-type "dot") "PERIOD")
((= raw-type "colon") "COLON")
((= raw-type "op") (hs-op-type raw-val))
(true (str "UNKNOWN_" raw-type))))
(up-val
(cond
((= raw-type "class") (str "." raw-val))
((= raw-type "id") (str "#" raw-val))
((= raw-type "eof") "<<<EOF>>>")
(true raw-val)))
(is-op
(or
(= raw-type "paren-open")
(= raw-type "paren-close")
(= raw-type "bracket-open")
(= raw-type "bracket-close")
(= raw-type "brace-open")
(= raw-type "brace-close")
(= raw-type "comma")
(= raw-type "dot")
(= raw-type "colon")
(= raw-type "op"))))
{:type up-type :value up-val :op is-op}))))
(define
hs-tokens-of
(fn
(src &rest rest)
(let
((template? (and (> (len rest) 0) (= (first rest) :template)))
(raw (if template? (hs-tokenize-template src) (hs-tokenize src))))
{:source src
:list (map hs-raw->api-token raw)
:pos 0})))
(define
hs-stream-token
(fn
(s i)
(let
((lst (get s "list"))
(pos (get s "pos")))
(or (nth lst (+ pos i))
(hs-eof-sentinel)))))
(define
hs-stream-consume
(fn
(s)
(let
((tok (hs-stream-token s 0)))
(when
(not (= (get tok "type") "EOF"))
(dict-set! s "pos" (+ (get s "pos") 1)))
tok)))
(define
hs-stream-has-more
(fn (s) (not (= (get (hs-stream-token s 0) "type") "EOF"))))
(define hs-token-type (fn (tok) (get tok "type")))
(define hs-token-value (fn (tok) (get tok "value")))
(define hs-token-op? (fn (tok) (get tok "op")))

View File

@@ -28,6 +28,27 @@
(define hs-ws? (fn (c) (or (= c " ") (= c "\t") (= c "\n") (= c "\r")))) (define hs-ws? (fn (c) (or (= c " ") (= c "\t") (= c "\n") (= c "\r"))))
(define
hs-hex-digit?
(fn
(c)
(or
(and (>= c "0") (<= c "9"))
(and (>= c "a") (<= c "f"))
(and (>= c "A") (<= c "F")))))
(define
hs-hex-val
(fn
(c)
(let
((code (char-code c)))
(cond
((and (>= code 48) (<= code 57)) (- code 48))
((and (>= code 65) (<= code 70)) (- code 55))
((and (>= code 97) (<= code 102)) (- code 87))
(true 0)))))
;; ── Keyword set ─────────────────────────────────────────────────── ;; ── Keyword set ───────────────────────────────────────────────────
(define (define
@@ -308,7 +329,7 @@
() ()
(cond (cond
(>= pos src-len) (>= pos src-len)
nil (error "Unterminated string")
(= (hs-cur) "\\") (= (hs-cur) "\\")
(do (do
(hs-advance! 1) (hs-advance! 1)
@@ -318,15 +339,37 @@
((ch (hs-cur))) ((ch (hs-cur)))
(cond (cond
(= ch "n") (= ch "n")
(append! chars "\n") (do (append! chars "\n") (hs-advance! 1))
(= ch "t") (= ch "t")
(append! chars "\t") (do (append! chars "\t") (hs-advance! 1))
(= ch "r")
(do (append! chars "\r") (hs-advance! 1))
(= ch "b")
(do (append! chars (char-from-code 8)) (hs-advance! 1))
(= ch "f")
(do (append! chars (char-from-code 12)) (hs-advance! 1))
(= ch "v")
(do (append! chars (char-from-code 11)) (hs-advance! 1))
(= ch "\\") (= ch "\\")
(append! chars "\\") (do (append! chars "\\") (hs-advance! 1))
(= ch quote-char) (= ch quote-char)
(append! chars quote-char) (do (append! chars quote-char) (hs-advance! 1))
:else (do (append! chars "\\") (append! chars ch))) (= ch "x")
(hs-advance! 1))) (do
(hs-advance! 1)
(if
(and
(< (+ pos 1) src-len)
(hs-hex-digit? (hs-cur))
(hs-hex-digit? (hs-peek 1)))
(let
((d1 (hs-hex-val (hs-cur)))
(d2 (hs-hex-val (hs-peek 1))))
(append! chars (char-from-code (+ (* d1 16) d2)))
(hs-advance! 2))
(error "Invalid hexadecimal escape: \\x")))
:else
(do (append! chars "\\") (append! chars ch) (hs-advance! 1)))))
(loop)) (loop))
(= (hs-cur) quote-char) (= (hs-cur) quote-char)
(hs-advance! 1) (hs-advance! 1)
@@ -623,4 +666,69 @@
:else (do (hs-advance! 1) (scan!))))))) :else (do (hs-advance! 1) (scan!)))))))
(scan!) (scan!)
(hs-emit! "eof" nil pos) (hs-emit! "eof" nil pos)
tokens)))
;; ── Template-mode tokenizer (E37 API) ────────────────────────────────
;; Used by hs-tokens-of when :template flag is set.
;; Emits outer " chars as single STRING tokens; ${ ... } as $ { <inner-tokens> };
;; inner content is tokenized with the regular hs-tokenize.
(define
hs-tokenize-template
(fn
(src)
(let
((tokens (list)) (pos 0) (src-len (len src)))
(define t-cur (fn () (if (< pos src-len) (nth src pos) nil)))
(define t-peek (fn (n) (if (< (+ pos n) src-len) (nth src (+ pos n)) nil)))
(define t-advance! (fn (n) (set! pos (+ pos n))))
(define t-emit! (fn (type value) (append! tokens (hs-make-token type value pos))))
(define
scan-to-close!
(fn
(depth)
(when
(and (< pos src-len) (> depth 0))
(cond
(= (t-cur) "{")
(do (t-advance! 1) (scan-to-close! (+ depth 1)))
(= (t-cur) "}")
(when (> (- depth 1) 0) (t-advance! 1) (scan-to-close! (- depth 1)))
:else (do (t-advance! 1) (scan-to-close! depth))))))
(define
scan-template!
(fn
()
(when
(< pos src-len)
(let
((ch (t-cur)))
(cond
(= ch "\"")
(do (t-emit! "string" "\"") (t-advance! 1) (scan-template!))
(and (= ch "$") (= (t-peek 1) "{"))
(do
(t-emit! "op" "$")
(t-advance! 1)
(t-emit! "brace-open" "{")
(t-advance! 1)
(let
((inner-start pos))
(scan-to-close! 1)
(let
((inner-src (slice src inner-start pos))
(inner-toks (hs-tokenize inner-src)))
(for-each
(fn (tok)
(when (not (= (get tok "type") "eof"))
(append! tokens tok)))
inner-toks))
(t-emit! "brace-close" "}")
(when (< pos src-len) (t-advance! 1)))
(scan-template!))
(hs-ws? ch)
(do (t-advance! 1) (scan-template!))
:else (do (t-advance! 1) (scan-template!)))))))
(scan-template!)
(t-emit! "eof" nil pos)
tokens))) tokens)))