;; _hyperscript tokenizer — produces token stream from hyperscript source ;; ;; Tokens: {:type T :value V :pos P :end E :line L} ;; Types: "keyword" "ident" "number" "string" "class" "id" "attr" "style" ;; "selector" "op" "dot" "paren-open" "paren-close" "bracket-open" ;; "bracket-close" "brace-open" "brace-close" "comma" "colon" ;; "template" "local" "eof" ;; ── Token constructor ───────────────────────────────────────────── (define hs-make-token (fn (type value pos end line) {:pos pos :end end :line line :value value :type type})) ;; ── Character predicates ────────────────────────────────────────── (define hs-digit? (fn (c) (and (>= c "0") (<= c "9")))) (define hs-letter? (fn (c) (or (and (>= c "a") (<= c "z")) (and (>= c "A") (<= c "Z"))))) (define hs-ident-start? (fn (c) (or (hs-letter? c) (= c "_") (= c "$")))) (define hs-ident-char? (fn (c) (or (hs-letter? c) (hs-digit? c) (= c "_") (= c "$") (= c "-")))) (define hs-ws? (fn (c) (or (= c " ") (= c "\t") (= c "\n") (= c "\r")))) ;; ── Keyword set ─────────────────────────────────────────────────── (define hs-keywords (list "on" "end" "set" "to" "put" "into" "before" "after" "add" "remove" "toggle" "if" "else" "otherwise" "then" "from" "in" "of" "for" "until" "wait" "send" "trigger" "call" "get" "take" "log" "hide" "show" "repeat" "while" "times" "forever" "break" "continue" "return" "throw" "catch" "finally" "def" "tell" "make" "fetch" "as" "with" "every" "or" "and" "not" "is" "no" "the" "my" "me" "it" "its" "result" "true" "false" "null" "when" "between" "at" "by" "queue" "elsewhere" "event" "target" "detail" "sender" "index" "indexed" "increment" "decrement" "append" "settle" "transition" "over" "closest" "next" "previous" "first" "last" "random" "pick" "empty" "clear" "swap" "open" "close" "exists" "matches" "contains" "do" "unless" "you" "your" "new" "init" "start" "go" "js" "less" "than" "greater" "class" "anything" "install" "measure" "behavior" "called" "render" "eval" "I" "am" "does" "some" "mod" "equal" "equals" "really" "include" "includes" "contain" "undefined" "exist" "match" "beep" "where" "sorted" "mapped" "split" "joined" "descending" "ascending" "scroll" "select" "reset" "default" "halt" "precedes" "precede" "follow" "follows" "ignoring" "case" "changes" "focus" "blur" "dom" "morph" "using" "giving" "ask" "answer")) (define hs-keyword? (fn (word) (some (fn (k) (= k word)) hs-keywords))) ;; ── Main tokenizer ──────────────────────────────────────────────── (define hs-tokenize (fn (src) (let ((tokens (list)) (pos 0) (src-len (len src)) (current-line 1)) (define hs-peek (fn (offset) (if (< (+ pos offset) src-len) (nth src (+ pos offset)) nil))) (define hs-cur (fn () (hs-peek 0))) (define hs-advance! (fn (n) (when (> n 0) (when (= (hs-cur) "\n") (set! current-line (+ current-line 1))) (set! pos (+ pos 1)) (hs-advance! (- n 1))))) (define skip-ws! (fn () (when (and (< pos src-len) (hs-ws? (hs-cur))) (hs-advance! 1) (skip-ws!)))) (define skip-comment! (fn () (when (and (< pos src-len) (not (= (hs-cur) "\n"))) (hs-advance! 1) (skip-comment!)))) (define read-ident (fn (start) (when (and (< pos src-len) (hs-ident-char? (hs-cur))) (hs-advance! 1) (read-ident start)) (slice src start pos))) (define read-number (fn (start) (when (and (< pos src-len) (hs-digit? (hs-cur))) (hs-advance! 1) (read-number start)) (when (and (< pos src-len) (= (hs-cur) ".") (< (+ pos 1) src-len) (hs-digit? (hs-peek 1))) (hs-advance! 1) (define read-frac (fn () (when (and (< pos src-len) (hs-digit? (hs-cur))) (hs-advance! 1) (read-frac)))) (read-frac)) (do (when (and (< pos src-len) (or (= (hs-cur) "e") (= (hs-cur) "E")) (or (and (< (+ pos 1) src-len) (hs-digit? (hs-peek 1))) (and (< (+ pos 2) src-len) (or (= (hs-peek 1) "+") (= (hs-peek 1) "-")) (hs-digit? (hs-peek 2))))) (hs-advance! 1) (when (and (< pos src-len) (or (= (hs-cur) "+") (= (hs-cur) "-"))) (hs-advance! 1)) (define read-exp-digits (fn () (when (and (< pos src-len) (hs-digit? (hs-cur))) (hs-advance! 1) (read-exp-digits)))) (read-exp-digits)) (let ((num-end pos)) (when (and (< pos src-len) (or (= (hs-cur) "m") (= (hs-cur) "s"))) (if (and (= (hs-cur) "m") (< (+ pos 1) src-len) (= (hs-peek 1) "s")) (hs-advance! 2) (when (= (hs-cur) "s") (hs-advance! 1)))) (slice src start pos))))) (define read-string (fn (quote-char) (let ((chars (list))) (hs-advance! 1) (define loop (fn () (cond (>= pos src-len) nil (= (hs-cur) "\\") (do (hs-advance! 1) (when (< pos src-len) (let ((ch (hs-cur))) (cond (= ch "n") (append! chars "\n") (= ch "t") (append! chars "\t") (= ch "\\") (append! chars "\\") (= ch quote-char) (append! chars quote-char) :else (do (append! chars "\\") (append! chars ch))) (hs-advance! 1))) (loop)) (= (hs-cur) quote-char) (hs-advance! 1) :else (do (append! chars (hs-cur)) (hs-advance! 1) (loop))))) (loop) (join "" chars)))) (define read-template (fn () (let ((chars (list))) (hs-advance! 1) (define loop (fn () (cond (>= pos src-len) nil (= (hs-cur) "`") (hs-advance! 1) (and (= (hs-cur) "$") (< (+ pos 1) src-len) (= (hs-peek 1) "{")) (do (append! chars "${") (hs-advance! 2) (let ((depth 1)) (define inner (fn () (when (and (< pos src-len) (> depth 0)) (cond (= (hs-cur) "{") (do (set! depth (+ depth 1)) (append! chars (hs-cur)) (hs-advance! 1) (inner)) (= (hs-cur) "}") (do (set! depth (- depth 1)) (when (> depth 0) (append! chars (hs-cur))) (hs-advance! 1) (when (> depth 0) (inner))) :else (do (append! chars (hs-cur)) (hs-advance! 1) (inner)))))) (inner)) (append! chars "}") (loop)) :else (do (append! chars (hs-cur)) (hs-advance! 1) (loop))))) (loop) (join "" chars)))) (define read-selector (fn () (let ((chars (list))) (hs-advance! 1) (define loop (fn () (cond (>= pos src-len) nil (and (= (hs-cur) "/") (< (+ pos 1) src-len) (= (hs-peek 1) ">")) (hs-advance! 2) :else (do (append! chars (hs-cur)) (hs-advance! 1) (loop))))) (loop) (join "" chars)))) (define read-class-name (fn (start) (when (and (< pos src-len) (or (hs-ident-char? (hs-cur)) (= (hs-cur) ":") (= (hs-cur) "[") (= (hs-cur) "]"))) (hs-advance! 1) (read-class-name start)) (slice src start pos))) (define hs-emit! (fn (type value start start-line) (append! tokens (hs-make-token type value start pos start-line)))) (define scan! (fn () (skip-ws!) (when (< pos src-len) (let ((ch (hs-cur)) (start pos) (start-line current-line)) (cond (and (= ch "-") (< (+ pos 1) src-len) (= (hs-peek 1) "-")) (do (hs-advance! 2) (skip-comment!) (scan!)) (and (= ch "/") (< (+ pos 1) src-len) (= (hs-peek 1) "/")) (do (hs-advance! 2) (skip-comment!) (scan!)) (and (= ch "<") (< (+ pos 1) src-len) (not (= (hs-peek 1) "=")) (or (hs-letter? (hs-peek 1)) (= (hs-peek 1) ".") (= (hs-peek 1) "#") (= (hs-peek 1) "[") (= (hs-peek 1) "*") (= (hs-peek 1) ":"))) (do (hs-emit! "selector" (read-selector) start start-line) (scan!)) (and (= ch ".") (< (+ pos 1) src-len) (= (hs-peek 1) ".")) (do (hs-advance! 2) (hs-emit! "op" ".." start start-line) (scan!)) (and (= ch ".") (< (+ pos 1) src-len) (or (hs-letter? (hs-peek 1)) (= (hs-peek 1) "-") (= (hs-peek 1) "_"))) (do (hs-advance! 1) (hs-emit! "class" (read-class-name pos) start start-line) (scan!)) (and (= ch "#") (< (+ pos 1) src-len) (hs-ident-start? (hs-peek 1))) (do (hs-advance! 1) (hs-emit! "id" (read-ident pos) start start-line) (scan!)) (and (= ch "@") (< (+ pos 1) src-len) (hs-ident-char? (hs-peek 1))) (do (hs-advance! 1) (hs-emit! "attr" (read-ident pos) start start-line) (scan!)) (and (= ch "^") (< (+ pos 1) src-len) (hs-ident-char? (hs-peek 1))) (do (hs-advance! 1) (hs-emit! "hat" (read-ident pos) start start-line) (scan!)) (and (= ch "~") (< (+ pos 1) src-len) (hs-letter? (hs-peek 1))) (do (hs-advance! 1) (hs-emit! "component" (str "~" (read-ident pos)) start start-line) (scan!)) (and (= ch "*") (< (+ pos 1) src-len) (hs-letter? (hs-peek 1))) (do (hs-advance! 1) (hs-emit! "style" (read-ident pos) start start-line) (scan!)) (and (= ch ":") (< (+ pos 1) src-len) (hs-ident-start? (hs-peek 1))) (do (hs-advance! 1) (hs-emit! "local" (read-ident pos) start start-line) (scan!)) (or (= ch "\"") (and (= ch "'") (not (and (< (+ pos 1) src-len) (= (hs-peek 1) "s") (or (>= (+ pos 2) src-len) (not (hs-ident-char? (hs-peek 2)))))))) (do (hs-emit! "string" (read-string ch) start start-line) (scan!)) (= ch "`") (do (hs-emit! "template" (read-template) start start-line) (scan!)) (hs-digit? ch) (do (hs-emit! "number" (read-number start) start start-line) (scan!)) (hs-ident-start? ch) (do (let ((word (read-ident start))) (hs-emit! (if (hs-keyword? word) "keyword" "ident") word start start-line)) (scan!)) (and (or (= ch "=") (= ch "!") (= ch "<") (= ch ">")) (< (+ pos 1) src-len) (= (hs-peek 1) "=")) (do (if (and (or (= ch "=") (= ch "!")) (< (+ pos 2) src-len) (= (hs-peek 2) "=")) (do (hs-advance! 3) (hs-emit! "op" (str ch "==") start start-line)) (do (hs-advance! 2) (hs-emit! "op" (str ch "=") start start-line))) (scan!)) (and (= ch "'") (< (+ pos 1) src-len) (= (hs-peek 1) "s") (or (>= (+ pos 2) src-len) (not (hs-ident-char? (hs-peek 2))))) (do (hs-advance! 2) (hs-emit! "op" "'s" start start-line) (scan!)) (= ch "(") (do (hs-advance! 1) (hs-emit! "paren-open" "(" start start-line) (scan!)) (= ch ")") (do (hs-advance! 1) (hs-emit! "paren-close" ")" start start-line) (scan!)) (= ch "[") (do (hs-advance! 1) (hs-emit! "bracket-open" "[" start start-line) (scan!)) (= ch "]") (do (hs-advance! 1) (hs-emit! "bracket-close" "]" start start-line) (scan!)) (= ch "{") (do (hs-advance! 1) (hs-emit! "brace-open" "{" start start-line) (scan!)) (= ch "}") (do (hs-advance! 1) (hs-emit! "brace-close" "}" start start-line) (scan!)) (= ch ",") (do (hs-advance! 1) (hs-emit! "comma" "," start start-line) (scan!)) (= ch "+") (do (hs-advance! 1) (hs-emit! "op" "+" start start-line) (scan!)) (= ch "-") (do (hs-advance! 1) (hs-emit! "op" "-" start start-line) (scan!)) (= ch "/") (do (hs-advance! 1) (hs-emit! "op" "/" start start-line) (scan!)) (= ch "=") (do (hs-advance! 1) (hs-emit! "op" "=" start start-line) (scan!)) (= ch "<") (do (hs-advance! 1) (hs-emit! "op" "<" start start-line) (scan!)) (= ch ">") (do (hs-advance! 1) (hs-emit! "op" ">" start start-line) (scan!)) (= ch "!") (do (hs-advance! 1) (hs-emit! "op" "!" start start-line) (scan!)) (= ch "*") (do (hs-advance! 1) (hs-emit! "op" "*" start start-line) (scan!)) (= ch "%") (do (hs-advance! 1) (hs-emit! "op" "%" start start-line) (scan!)) (= ch ".") (do (hs-advance! 1) (hs-emit! "dot" "." start start-line) (scan!)) (= ch "\\") (do (hs-advance! 1) (hs-emit! "op" "\\" start start-line) (scan!)) (= ch ":") (do (hs-advance! 1) (hs-emit! "colon" ":" start start-line) (scan!)) (= ch "|") (do (hs-advance! 1) (hs-emit! "op" "|" start start-line) (scan!)) :else (do (hs-advance! 1) (scan!))))))) (scan!) (hs-emit! "eof" nil pos current-line) tokens)))