;; _hyperscript tokenizer — produces token stream from hyperscript source ;; ;; Tokens: {:type T :value V :pos P} ;; Types: "keyword" "ident" "number" "string" "class" "id" "attr" "style" ;; "selector" "op" "dot" "paren-open" "paren-close" "bracket-open" ;; "bracket-close" "brace-open" "brace-close" "comma" "colon" ;; "template" "local" "eof" ;; ── Token constructor ───────────────────────────────────────────── (define hs-make-token (fn (type value pos) {:pos pos :value value :type type})) ;; ── Character predicates ────────────────────────────────────────── (define hs-digit? (fn (c) (and (>= c "0") (<= c "9")))) (define hs-letter? (fn (c) (or (and (>= c "a") (<= c "z")) (and (>= c "A") (<= c "Z"))))) (define hs-ident-start? (fn (c) (or (hs-letter? c) (= c "_") (= c "$")))) (define hs-ident-char? (fn (c) (or (hs-letter? c) (hs-digit? c) (= c "_") (= c "$") (= c "-")))) (define hs-ws? (fn (c) (or (= c " ") (= c "\t") (= c "\n") (= c "\r")))) (define hs-hex-digit? (fn (c) (or (and (>= c "0") (<= c "9")) (and (>= c "a") (<= c "f")) (and (>= c "A") (<= c "F"))))) (define hs-hex-val (fn (c) (let ((code (char-code c))) (cond ((and (>= code 48) (<= code 57)) (- code 48)) ((and (>= code 65) (<= code 70)) (- code 55)) ((and (>= code 97) (<= code 102)) (- code 87)) (true 0))))) ;; ── Keyword set ─────────────────────────────────────────────────── (define hs-keywords (list "on" "end" "set" "to" "put" "into" "before" "after" "add" "remove" "toggle" "if" "else" "otherwise" "then" "from" "in" "of" "for" "until" "wait" "send" "trigger" "call" "get" "take" "log" "hide" "show" "repeat" "while" "times" "forever" "break" "continue" "return" "throw" "catch" "finally" "def" "tell" "make" "fetch" "as" "with" "every" "or" "and" "not" "is" "no" "the" "my" "me" "it" "its" "result" "true" "false" "null" "when" "between" "at" "by" "queue" "elsewhere" "event" "target" "detail" "sender" "index" "indexed" "increment" "decrement" "append" "settle" "transition" "over" "closest" "next" "previous" "first" "last" "random" "pick" "empty" "clear" "swap" "open" "close" "exists" "matches" "contains" "do" "unless" "you" "your" "new" "init" "start" "go" "js" "less" "than" "greater" "class" "anything" "install" "measure" "behavior" "called" "render" "eval" "I" "am" "does" "some" "mod" "equal" "equals" "really" "include" "includes" "contain" "undefined" "exist" "match" "beep" "where" "sorted" "mapped" "split" "joined" "descending" "ascending" "scroll" "select" "reset" "default" "halt" "precedes" "precede" "follow" "follows" "ignoring" "case" "changes" "focus" "blur" "dom" "morph" "using" "giving" "ask" "answer")) (define hs-keyword? (fn (word) (some (fn (k) (= k word)) hs-keywords))) ;; ── Main tokenizer ──────────────────────────────────────────────── (define hs-tokenize (fn (src) (let ((tokens (list)) (pos 0) (src-len (len src))) (define hs-peek (fn (offset) (if (< (+ pos offset) src-len) (nth src (+ pos offset)) nil))) (define hs-cur (fn () (hs-peek 0))) (define hs-advance! (fn (n) (set! pos (+ pos n)))) (define skip-ws! (fn () (when (and (< pos src-len) (hs-ws? (hs-cur))) (hs-advance! 1) (skip-ws!)))) (define skip-comment! (fn () (when (and (< pos src-len) (not (= (hs-cur) "\n"))) (hs-advance! 1) (skip-comment!)))) (define read-ident (fn (start) (when (and (< pos src-len) (hs-ident-char? (hs-cur))) (hs-advance! 1) (read-ident start)) (slice src start pos))) (define read-number (fn (start) (define read-int (fn () (when (and (< pos src-len) (hs-digit? (hs-cur))) (hs-advance! 1) (read-int)))) (read-int) (when (and (< pos src-len) (= (hs-cur) ".") (< (+ pos 1) src-len) (hs-digit? (hs-peek 1))) (hs-advance! 1) (read-int)) (do (when (and (< pos src-len) (or (= (hs-cur) "e") (= (hs-cur) "E")) (or (and (< (+ pos 1) src-len) (hs-digit? (hs-peek 1))) (and (< (+ pos 2) src-len) (or (= (hs-peek 1) "+") (= (hs-peek 1) "-")) (hs-digit? (hs-peek 2))))) (hs-advance! 1) (when (and (< pos src-len) (or (= (hs-cur) "+") (= (hs-cur) "-"))) (hs-advance! 1)) (read-int)) (let ((num-end pos)) (when (and (< pos src-len) (or (= (hs-cur) "m") (= (hs-cur) "s"))) (if (and (= (hs-cur) "m") (< (+ pos 1) src-len) (= (hs-peek 1) "s")) (hs-advance! 2) (when (= (hs-cur) "s") (hs-advance! 1)))) (slice src start pos))))) (define read-string (fn (quote-char) (let ((chars (list))) (hs-advance! 1) (define loop (fn () (cond (>= pos src-len) (error "Unterminated string") (= (hs-cur) "\\") (do (hs-advance! 1) (when (< pos src-len) (let ((ch (hs-cur))) (cond (= ch "n") (do (append! chars "\n") (hs-advance! 1)) (= ch "t") (do (append! chars "\t") (hs-advance! 1)) (= ch "r") (do (append! chars "\r") (hs-advance! 1)) (= ch "b") (do (append! chars (char-from-code 8)) (hs-advance! 1)) (= ch "f") (do (append! chars (char-from-code 12)) (hs-advance! 1)) (= ch "v") (do (append! chars (char-from-code 11)) (hs-advance! 1)) (= ch "\\") (do (append! chars "\\") (hs-advance! 1)) (= ch quote-char) (do (append! chars quote-char) (hs-advance! 1)) (= ch "x") (do (hs-advance! 1) (if (and (< (+ pos 1) src-len) (hs-hex-digit? (hs-cur)) (hs-hex-digit? (hs-peek 1))) (let ((d1 (hs-hex-val (hs-cur))) (d2 (hs-hex-val (hs-peek 1)))) (append! chars (char-from-code (+ (* d1 16) d2))) (hs-advance! 2)) (error "Invalid hexadecimal escape: \\x"))) :else (do (append! chars "\\") (append! chars ch) (hs-advance! 1))))) (loop)) (= (hs-cur) quote-char) (hs-advance! 1) :else (do (append! chars (hs-cur)) (hs-advance! 1) (loop))))) (loop) (join "" chars)))) (define read-template (fn () (let ((chars (list))) (hs-advance! 1) (define loop (fn () (cond (>= pos src-len) nil (= (hs-cur) "`") (hs-advance! 1) (and (= (hs-cur) "$") (< (+ pos 1) src-len) (= (hs-peek 1) "{")) (do (append! chars "${") (hs-advance! 2) (let ((depth 1)) (define inner (fn () (when (and (< pos src-len) (> depth 0)) (cond (= (hs-cur) "{") (do (set! depth (+ depth 1)) (append! chars (hs-cur)) (hs-advance! 1) (inner)) (= (hs-cur) "}") (do (set! depth (- depth 1)) (when (> depth 0) (append! chars (hs-cur))) (hs-advance! 1) (when (> depth 0) (inner))) :else (do (append! chars (hs-cur)) (hs-advance! 1) (inner)))))) (inner)) (append! chars "}") (loop)) :else (do (append! chars (hs-cur)) (hs-advance! 1) (loop))))) (loop) (join "" chars)))) (define read-selector (fn () (let ((chars (list))) (hs-advance! 1) (define loop (fn () (cond (>= pos src-len) nil (and (= (hs-cur) "/") (< (+ pos 1) src-len) (= (hs-peek 1) ">")) (hs-advance! 2) :else (do (append! chars (hs-cur)) (hs-advance! 1) (loop))))) (loop) (join "" chars)))) (define read-class-name (fn (start) (when (and (< pos src-len) (or (hs-ident-char? (hs-cur)) (= (hs-cur) ":") (= (hs-cur) "[") (= (hs-cur) "]"))) (hs-advance! 1) (read-class-name start)) (slice src start pos))) (define hs-emit! (fn (type value start) (append! tokens (hs-make-token type value start)))) (define scan! (fn () (skip-ws!) (when (< pos src-len) (let ((ch (hs-cur)) (start pos)) (cond (and (= ch "-") (< (+ pos 1) src-len) (= (hs-peek 1) "-")) (do (hs-advance! 2) (skip-comment!) (scan!)) (and (= ch "/") (< (+ pos 1) src-len) (= (hs-peek 1) "/")) (do (hs-advance! 2) (skip-comment!) (scan!)) (and (= ch "<") (< (+ pos 1) src-len) (not (= (hs-peek 1) "=")) (or (hs-letter? (hs-peek 1)) (= (hs-peek 1) ".") (= (hs-peek 1) "#") (= (hs-peek 1) "[") (= (hs-peek 1) "*") (= (hs-peek 1) ":"))) (do (hs-emit! "selector" (read-selector) start) (scan!)) (and (= ch ".") (< (+ pos 1) src-len) (= (hs-peek 1) ".")) (do (hs-emit! "op" ".." start) (hs-advance! 2) (scan!)) (and (= ch ".") (< (+ pos 1) src-len) (or (hs-letter? (hs-peek 1)) (= (hs-peek 1) "-") (= (hs-peek 1) "_"))) (do (hs-advance! 1) (hs-emit! "class" (read-class-name pos) start) (scan!)) (and (= ch "#") (< (+ pos 1) src-len) (hs-ident-start? (hs-peek 1))) (do (hs-advance! 1) (hs-emit! "id" (read-ident pos) start) (scan!)) (and (= ch "@") (< (+ pos 1) src-len) (hs-ident-char? (hs-peek 1))) (do (hs-advance! 1) (hs-emit! "attr" (read-ident pos) start) (scan!)) (and (= ch "^") (< (+ pos 1) src-len) (hs-ident-char? (hs-peek 1))) (do (hs-advance! 1) (hs-emit! "hat" (read-ident pos) start) (scan!)) (and (= ch "~") (< (+ pos 1) src-len) (hs-letter? (hs-peek 1))) (do (hs-advance! 1) (hs-emit! "component" (str "~" (read-ident pos)) start) (scan!)) (and (= ch "*") (< (+ pos 1) src-len) (hs-letter? (hs-peek 1))) (do (hs-advance! 1) (hs-emit! "style" (read-ident pos) start) (scan!)) (and (= ch ":") (< (+ pos 1) src-len) (hs-ident-start? (hs-peek 1))) (do (hs-advance! 1) (hs-emit! "local" (read-ident pos) start) (scan!)) (or (= ch "\"") (and (= ch "'") (not (and (< (+ pos 1) src-len) (= (hs-peek 1) "s") (or (>= (+ pos 2) src-len) (not (hs-ident-char? (hs-peek 2)))))))) (do (hs-emit! "string" (read-string ch) start) (scan!)) (= ch "`") (do (hs-emit! "template" (read-template) start) (scan!)) (hs-digit? ch) (do (hs-emit! "number" (read-number start) start) (scan!)) (hs-ident-start? ch) (do (let ((word (read-ident start))) (hs-emit! (if (hs-keyword? word) "keyword" "ident") word start)) (scan!)) (and (or (= ch "=") (= ch "!") (= ch "<") (= ch ">")) (< (+ pos 1) src-len) (= (hs-peek 1) "=")) (do (if (and (or (= ch "=") (= ch "!")) (< (+ pos 2) src-len) (= (hs-peek 2) "=")) (do (hs-emit! "op" (str ch "==") start) (hs-advance! 3)) (do (hs-emit! "op" (str ch "=") start) (hs-advance! 2))) (scan!)) (and (= ch "'") (< (+ pos 1) src-len) (= (hs-peek 1) "s") (or (>= (+ pos 2) src-len) (not (hs-ident-char? (hs-peek 2))))) (do (hs-emit! "op" "'s" start) (hs-advance! 2) (scan!)) (= ch "(") (do (hs-emit! "paren-open" "(" start) (hs-advance! 1) (scan!)) (= ch ")") (do (hs-emit! "paren-close" ")" start) (hs-advance! 1) (scan!)) (= ch "[") (do (hs-emit! "bracket-open" "[" start) (hs-advance! 1) (scan!)) (= ch "]") (do (hs-emit! "bracket-close" "]" start) (hs-advance! 1) (scan!)) (= ch "{") (do (hs-emit! "brace-open" "{" start) (hs-advance! 1) (scan!)) (= ch "}") (do (hs-emit! "brace-close" "}" start) (hs-advance! 1) (scan!)) (= ch ",") (do (hs-emit! "comma" "," start) (hs-advance! 1) (scan!)) (= ch "+") (do (hs-emit! "op" "+" start) (hs-advance! 1) (scan!)) (= ch "-") (do (hs-emit! "op" "-" start) (hs-advance! 1) (scan!)) (= ch "/") (do (hs-emit! "op" "/" start) (hs-advance! 1) (scan!)) (= ch "=") (do (hs-emit! "op" "=" start) (hs-advance! 1) (scan!)) (= ch "<") (do (hs-emit! "op" "<" start) (hs-advance! 1) (scan!)) (= ch ">") (do (hs-emit! "op" ">" start) (hs-advance! 1) (scan!)) (= ch "!") (do (hs-emit! "op" "!" start) (hs-advance! 1) (scan!)) (= ch "*") (do (hs-emit! "op" "*" start) (hs-advance! 1) (scan!)) (= ch "%") (do (hs-emit! "op" "%" start) (hs-advance! 1) (scan!)) (= ch ".") (do (hs-emit! "dot" "." start) (hs-advance! 1) (scan!)) (= ch "\\") (do (hs-emit! "op" "\\" start) (hs-advance! 1) (scan!)) (= ch ":") (do (hs-emit! "colon" ":" start) (hs-advance! 1) (scan!)) (= ch "|") (do (hs-emit! "op" "|" start) (hs-advance! 1) (scan!)) (= ch "&") (do (hs-emit! "op" "&" start) (hs-advance! 1) (scan!)) (= ch "#") (do (hs-emit! "op" "#" start) (hs-advance! 1) (scan!)) (= ch "?") (do (hs-emit! "op" "?" start) (hs-advance! 1) (scan!)) (= ch ";") (do (hs-emit! "op" ";" start) (hs-advance! 1) (scan!)) :else (do (hs-advance! 1) (scan!))))))) (scan!) (hs-emit! "eof" nil pos) tokens))) ;; ── Template-mode tokenizer (E37 API) ──────────────────────────────── ;; Used by hs-tokens-of when :template flag is set. ;; Emits outer " chars as single STRING tokens; ${ ... } as $ { }; ;; inner content is tokenized with the regular hs-tokenize. (define hs-tokenize-template (fn (src) (let ((tokens (list)) (pos 0) (src-len (len src))) (define t-cur (fn () (if (< pos src-len) (nth src pos) nil))) (define t-peek (fn (n) (if (< (+ pos n) src-len) (nth src (+ pos n)) nil))) (define t-advance! (fn (n) (set! pos (+ pos n)))) (define t-emit! (fn (type value) (append! tokens (hs-make-token type value pos)))) (define scan-to-close! (fn (depth) (when (and (< pos src-len) (> depth 0)) (cond (= (t-cur) "{") (do (t-advance! 1) (scan-to-close! (+ depth 1))) (= (t-cur) "}") (when (> (- depth 1) 0) (t-advance! 1) (scan-to-close! (- depth 1))) :else (do (t-advance! 1) (scan-to-close! depth)))))) (define scan-template! (fn () (when (< pos src-len) (let ((ch (t-cur))) (cond (= ch "\"") (do (t-emit! "string" "\"") (t-advance! 1) (scan-template!)) (and (= ch "$") (= (t-peek 1) "{")) (do (t-emit! "op" "$") (t-advance! 1) (t-emit! "brace-open" "{") (t-advance! 1) (let ((inner-start pos)) (scan-to-close! 1) (let ((inner-src (slice src inner-start pos)) (inner-toks (hs-tokenize inner-src))) (for-each (fn (tok) (when (not (= (get tok "type") "eof")) (append! tokens tok))) inner-toks)) (t-emit! "brace-close" "}") (when (< pos src-len) (t-advance! 1))) (scan-template!)) (= ch "$") (do (t-emit! "op" "$") (t-advance! 1) (scan-template!)) (hs-ws? ch) (do (t-advance! 1) (scan-template!)) :else (do (t-advance! 1) (scan-template!))))))) (scan-template!) (t-emit! "eof" nil) tokens)))