;; _hyperscript tokenizer — produces token stream from hyperscript source ;; ;; Tokens: {:type T :value V :pos P} ;; Types: "keyword" "ident" "number" "string" "class" "id" "attr" "style" ;; "selector" "op" "dot" "paren-open" "paren-close" "bracket-open" ;; "bracket-close" "brace-open" "brace-close" "comma" "colon" ;; "template" "local" "eof" ;; ── Token constructor ───────────────────────────────────────────── (define hs-make-token (fn (type value pos) {:pos pos :value value :type type})) ;; ── Character predicates ────────────────────────────────────────── (define hs-digit? (fn (c) (and (>= c "0") (<= c "9")))) (define hs-letter? (fn (c) (or (and (>= c "a") (<= c "z")) (and (>= c "A") (<= c "Z"))))) (define hs-ident-start? (fn (c) (or (hs-letter? c) (= c "_") (= c "$")))) (define hs-ident-char? (fn (c) (or (hs-letter? c) (hs-digit? c) (= c "_") (= c "$") (= c "-")))) (define hs-ws? (fn (c) (or (= c " ") (= c "\t") (= c "\n") (= c "\r")))) ;; ── Keyword set ─────────────────────────────────────────────────── (define hs-keywords (list "on" "end" "set" "to" "put" "into" "before" "after" "add" "remove" "toggle" "if" "else" "otherwise" "then" "from" "in" "of" "for" "until" "wait" "send" "trigger" "call" "get" "take" "log" "hide" "show" "repeat" "while" "times" "forever" "break" "continue" "return" "throw" "catch" "finally" "def" "tell" "make" "fetch" "as" "with" "every" "or" "and" "not" "is" "no" "the" "my" "me" "it" "its" "result" "true" "false" "null" "when" "between" "at" "by" "queue" "elsewhere" "event" "target" "detail" "sender" "index" "indexed" "increment" "decrement" "append" "settle" "transition" "over" "closest" "next" "previous" "first" "last" "random" "pick" "empty" "clear" "swap" "open" "close" "exists" "matches" "contains" "do" "unless" "you" "your" "new" "init" "start" "go" "js" "less" "than" "greater" "class" "anything" "install" "measure" "behavior" "called" "render" "eval" "I" "am" "does" "some" "mod" "equal" "equals" "really" "include" "includes" "contain" "undefined" "exist" "match" "beep" "where" "sorted" "mapped" "split" "joined" "descending" "ascending" "scroll" "select" "reset" "default" "halt" "precedes" "precede" "follow" "follows" "ignoring" "case" "changes" "focus" "blur" "dom" "morph" "using" "giving")) (define hs-keyword? (fn (word) (some (fn (k) (= k word)) hs-keywords))) ;; ── Main tokenizer ──────────────────────────────────────────────── (define hs-tokenize (fn (src) (let ((tokens (list)) (pos 0) (src-len (len src))) (define hs-peek (fn (offset) (if (< (+ pos offset) src-len) (nth src (+ pos offset)) nil))) (define hs-cur (fn () (hs-peek 0))) (define hs-advance! (fn (n) (set! pos (+ pos n)))) (define skip-ws! (fn () (when (and (< pos src-len) (hs-ws? (hs-cur))) (hs-advance! 1) (skip-ws!)))) (define skip-comment! (fn () (when (and (< pos src-len) (not (= (hs-cur) "\n"))) (hs-advance! 1) (skip-comment!)))) (define read-ident (fn (start) (when (and (< pos src-len) (hs-ident-char? (hs-cur))) (hs-advance! 1) (read-ident start)) (slice src start pos))) (define read-number (fn (start) (when (and (< pos src-len) (hs-digit? (hs-cur))) (hs-advance! 1) (read-number start)) (when (and (< pos src-len) (= (hs-cur) ".") (< (+ pos 1) src-len) (hs-digit? (hs-peek 1))) (hs-advance! 1) (define read-frac (fn () (when (and (< pos src-len) (hs-digit? (hs-cur))) (hs-advance! 1) (read-frac)))) (read-frac)) (do (when (and (< pos src-len) (or (= (hs-cur) "e") (= (hs-cur) "E")) (or (and (< (+ pos 1) src-len) (hs-digit? (hs-peek 1))) (and (< (+ pos 2) src-len) (or (= (hs-peek 1) "+") (= (hs-peek 1) "-")) (hs-digit? (hs-peek 2))))) (hs-advance! 1) (when (and (< pos src-len) (or (= (hs-cur) "+") (= (hs-cur) "-"))) (hs-advance! 1)) (define read-exp-digits (fn () (when (and (< pos src-len) (hs-digit? (hs-cur))) (hs-advance! 1) (read-exp-digits)))) (read-exp-digits)) (let ((num-end pos)) (when (and (< pos src-len) (or (= (hs-cur) "m") (= (hs-cur) "s"))) (if (and (= (hs-cur) "m") (< (+ pos 1) src-len) (= (hs-peek 1) "s")) (hs-advance! 2) (when (= (hs-cur) "s") (hs-advance! 1)))) (slice src start pos))))) (define read-string (fn (quote-char) (let ((chars (list))) (hs-advance! 1) (define loop (fn () (cond (>= pos src-len) nil (= (hs-cur) "\\") (do (hs-advance! 1) (when (< pos src-len) (let ((ch (hs-cur))) (cond (= ch "n") (append! chars "\n") (= ch "t") (append! chars "\t") (= ch "\\") (append! chars "\\") (= ch quote-char) (append! chars quote-char) :else (do (append! chars "\\") (append! chars ch))) (hs-advance! 1))) (loop)) (= (hs-cur) quote-char) (hs-advance! 1) :else (do (append! chars (hs-cur)) (hs-advance! 1) (loop))))) (loop) (join "" chars)))) (define read-template (fn () (let ((chars (list))) (hs-advance! 1) (define loop (fn () (cond (>= pos src-len) nil (= (hs-cur) "`") (hs-advance! 1) (and (= (hs-cur) "$") (< (+ pos 1) src-len) (= (hs-peek 1) "{")) (do (append! chars "${") (hs-advance! 2) (let ((depth 1)) (define inner (fn () (when (and (< pos src-len) (> depth 0)) (cond (= (hs-cur) "{") (do (set! depth (+ depth 1)) (append! chars (hs-cur)) (hs-advance! 1) (inner)) (= (hs-cur) "}") (do (set! depth (- depth 1)) (when (> depth 0) (append! chars (hs-cur))) (hs-advance! 1) (when (> depth 0) (inner))) :else (do (append! chars (hs-cur)) (hs-advance! 1) (inner)))))) (inner)) (append! chars "}") (loop)) :else (do (append! chars (hs-cur)) (hs-advance! 1) (loop))))) (loop) (join "" chars)))) (define read-selector (fn () (let ((chars (list))) (hs-advance! 1) (define loop (fn () (cond (>= pos src-len) nil (and (= (hs-cur) "/") (< (+ pos 1) src-len) (= (hs-peek 1) ">")) (hs-advance! 2) :else (do (append! chars (hs-cur)) (hs-advance! 1) (loop))))) (loop) (join "" chars)))) (define read-class-name (fn (start) (when (and (< pos src-len) (or (hs-ident-char? (hs-cur)) (= (hs-cur) ":") (= (hs-cur) "[") (= (hs-cur) "]"))) (hs-advance! 1) (read-class-name start)) (slice src start pos))) (define hs-emit! (fn (type value start) (append! tokens (hs-make-token type value start)))) (define scan! (fn () (skip-ws!) (when (< pos src-len) (let ((ch (hs-cur)) (start pos)) (cond (and (= ch "-") (< (+ pos 1) src-len) (= (hs-peek 1) "-")) (do (hs-advance! 2) (skip-comment!) (scan!)) (and (= ch "/") (< (+ pos 1) src-len) (= (hs-peek 1) "/")) (do (hs-advance! 2) (skip-comment!) (scan!)) (and (= ch "<") (< (+ pos 1) src-len) (not (= (hs-peek 1) "=")) (or (hs-letter? (hs-peek 1)) (= (hs-peek 1) ".") (= (hs-peek 1) "#") (= (hs-peek 1) "[") (= (hs-peek 1) "*") (= (hs-peek 1) ":"))) (do (hs-emit! "selector" (read-selector) start) (scan!)) (and (= ch ".") (< (+ pos 1) src-len) (= (hs-peek 1) ".")) (do (hs-emit! "op" ".." start) (hs-advance! 2) (scan!)) (and (= ch ".") (< (+ pos 1) src-len) (or (hs-letter? (hs-peek 1)) (= (hs-peek 1) "-") (= (hs-peek 1) "_"))) (do (hs-advance! 1) (hs-emit! "class" (read-class-name pos) start) (scan!)) (and (= ch "#") (< (+ pos 1) src-len) (hs-ident-start? (hs-peek 1))) (do (hs-advance! 1) (hs-emit! "id" (read-ident pos) start) (scan!)) (and (= ch "@") (< (+ pos 1) src-len) (hs-ident-char? (hs-peek 1))) (do (hs-advance! 1) (hs-emit! "attr" (read-ident pos) start) (scan!)) (and (= ch "^") (< (+ pos 1) src-len) (hs-ident-char? (hs-peek 1))) (do (hs-advance! 1) (hs-emit! "hat" (read-ident pos) start) (scan!)) (and (= ch "~") (< (+ pos 1) src-len) (hs-letter? (hs-peek 1))) (do (hs-advance! 1) (hs-emit! "component" (str "~" (read-ident pos)) start) (scan!)) (and (= ch "*") (< (+ pos 1) src-len) (hs-letter? (hs-peek 1))) (do (hs-advance! 1) (hs-emit! "style" (read-ident pos) start) (scan!)) (and (= ch ":") (< (+ pos 1) src-len) (hs-ident-start? (hs-peek 1))) (do (hs-advance! 1) (hs-emit! "local" (read-ident pos) start) (scan!)) (or (= ch "\"") (and (= ch "'") (not (and (< (+ pos 1) src-len) (= (hs-peek 1) "s") (or (>= (+ pos 2) src-len) (not (hs-ident-char? (hs-peek 2)))))))) (do (hs-emit! "string" (read-string ch) start) (scan!)) (= ch "`") (do (hs-emit! "template" (read-template) start) (scan!)) (hs-digit? ch) (do (hs-emit! "number" (read-number start) start) (scan!)) (hs-ident-start? ch) (do (let ((word (read-ident start))) (hs-emit! (if (hs-keyword? word) "keyword" "ident") word start)) (scan!)) (and (or (= ch "=") (= ch "!") (= ch "<") (= ch ">")) (< (+ pos 1) src-len) (= (hs-peek 1) "=")) (do (if (and (or (= ch "=") (= ch "!")) (< (+ pos 2) src-len) (= (hs-peek 2) "=")) (do (hs-emit! "op" (str ch "==") start) (hs-advance! 3)) (do (hs-emit! "op" (str ch "=") start) (hs-advance! 2))) (scan!)) (and (= ch "'") (< (+ pos 1) src-len) (= (hs-peek 1) "s") (or (>= (+ pos 2) src-len) (not (hs-ident-char? (hs-peek 2))))) (do (hs-emit! "op" "'s" start) (hs-advance! 2) (scan!)) (= ch "(") (do (hs-emit! "paren-open" "(" start) (hs-advance! 1) (scan!)) (= ch ")") (do (hs-emit! "paren-close" ")" start) (hs-advance! 1) (scan!)) (= ch "[") (do (hs-emit! "bracket-open" "[" start) (hs-advance! 1) (scan!)) (= ch "]") (do (hs-emit! "bracket-close" "]" start) (hs-advance! 1) (scan!)) (= ch "{") (do (hs-emit! "brace-open" "{" start) (hs-advance! 1) (scan!)) (= ch "}") (do (hs-emit! "brace-close" "}" start) (hs-advance! 1) (scan!)) (= ch ",") (do (hs-emit! "comma" "," start) (hs-advance! 1) (scan!)) (= ch "+") (do (hs-emit! "op" "+" start) (hs-advance! 1) (scan!)) (= ch "-") (do (hs-emit! "op" "-" start) (hs-advance! 1) (scan!)) (= ch "/") (do (hs-emit! "op" "/" start) (hs-advance! 1) (scan!)) (= ch "=") (do (hs-emit! "op" "=" start) (hs-advance! 1) (scan!)) (= ch "<") (do (hs-emit! "op" "<" start) (hs-advance! 1) (scan!)) (= ch ">") (do (hs-emit! "op" ">" start) (hs-advance! 1) (scan!)) (= ch "!") (do (hs-emit! "op" "!" start) (hs-advance! 1) (scan!)) (= ch "*") (do (hs-emit! "op" "*" start) (hs-advance! 1) (scan!)) (= ch "%") (do (hs-emit! "op" "%" start) (hs-advance! 1) (scan!)) (= ch ".") (do (hs-emit! "dot" "." start) (hs-advance! 1) (scan!)) (= ch "\\") (do (hs-emit! "op" "\\" start) (hs-advance! 1) (scan!)) (= ch ":") (do (hs-emit! "colon" ":" start) (hs-advance! 1) (scan!)) (= ch "|") (do (hs-emit! "op" "|" start) (hs-advance! 1) (scan!)) :else (do (hs-advance! 1) (scan!))))))) (scan!) (hs-emit! "eof" nil pos) tokens)))