;; _hyperscript tokenizer — produces token stream from hyperscript source ;; ;; Tokens: {:type T :value V :pos P} ;; Types: "keyword" "ident" "number" "string" "class" "id" "attr" "style" ;; "selector" "op" "dot" "paren-open" "paren-close" "bracket-open" ;; "bracket-close" "brace-open" "brace-close" "comma" "colon" ;; "template" "local" "eof" ;; ── Token constructor ───────────────────────────────────────────── (define hs-make-token (fn (type value pos) {:pos pos :value value :type type})) ;; ── Character predicates ────────────────────────────────────────── (define hs-digit? (fn (c) (and (>= c "0") (<= c "9")))) (define hs-letter? (fn (c) (or (and (>= c "a") (<= c "z")) (and (>= c "A") (<= c "Z"))))) (define hs-ident-start? (fn (c) (or (hs-letter? c) (= c "_") (= c "$")))) (define hs-ident-char? (fn (c) (or (hs-letter? c) (hs-digit? c) (= c "_") (= c "$") (= c "-")))) (define hs-ws? (fn (c) (or (= c " ") (= c "\t") (= c "\n") (= c "\r")))) ;; ── Keyword set ─────────────────────────────────────────────────── (define hs-keywords (list "on" "end" "set" "to" "put" "into" "before" "after" "add" "remove" "toggle" "if" "else" "otherwise" "then" "from" "in" "of" "for" "until" "wait" "send" "trigger" "call" "get" "take" "log" "hide" "show" "repeat" "while" "times" "forever" "break" "continue" "return" "throw" "catch" "finally" "def" "tell" "make" "fetch" "as" "with" "every" "or" "and" "not" "is" "no" "the" "my" "me" "it" "its" "result" "true" "false" "null" "when" "between" "at" "by" "queue" "elsewhere" "event" "target" "detail" "sender" "index" "increment" "decrement" "append" "settle" "transition" "over" "closest" "next" "previous" "first" "last" "random" "empty" "exists" "matches" "contains" "do" "unless" "you" "your" "new" "init" "start" "go" "js" "less" "than" "greater" "class" "anything" "install" "measure" "behavior" "called" "render" "eval")) (define hs-keyword? (fn (word) (some (fn (k) (= k word)) hs-keywords))) ;; ── Main tokenizer ──────────────────────────────────────────────── (define hs-tokenize (fn (src) (let ((tokens (list)) (pos 0) (src-len (len src))) (define hs-peek (fn (offset) (if (< (+ pos offset) src-len) (nth src (+ pos offset)) nil))) (define hs-cur (fn () (hs-peek 0))) (define hs-advance! (fn (n) (set! pos (+ pos n)))) (define skip-ws! (fn () (when (and (< pos src-len) (hs-ws? (hs-cur))) (hs-advance! 1) (skip-ws!)))) (define skip-comment! (fn () (when (and (< pos src-len) (not (= (hs-cur) "\n"))) (hs-advance! 1) (skip-comment!)))) (define read-ident (fn (start) (when (and (< pos src-len) (hs-ident-char? (hs-cur))) (hs-advance! 1) (read-ident start)) (slice src start pos))) (define read-number (fn (start) (when (and (< pos src-len) (hs-digit? (hs-cur))) (hs-advance! 1) (read-number start)) (when (and (< pos src-len) (= (hs-cur) ".") (< (+ pos 1) src-len) (hs-digit? (hs-peek 1))) (hs-advance! 1) (define read-frac (fn () (when (and (< pos src-len) (hs-digit? (hs-cur))) (hs-advance! 1) (read-frac)))) (read-frac)) (let ((num-end pos)) (when (and (< pos src-len) (or (= (hs-cur) "m") (= (hs-cur) "s"))) (if (and (= (hs-cur) "m") (< (+ pos 1) src-len) (= (hs-peek 1) "s")) (hs-advance! 2) (when (= (hs-cur) "s") (hs-advance! 1)))) (slice src start pos)))) (define read-string (fn (quote-char) (let ((chars (list))) (hs-advance! 1) (define loop (fn () (cond (>= pos src-len) nil (= (hs-cur) "\\") (do (hs-advance! 1) (when (< pos src-len) (let ((ch (hs-cur))) (cond (= ch "n") (append! chars "\n") (= ch "t") (append! chars "\t") (= ch "\\") (append! chars "\\") (= ch quote-char) (append! chars quote-char) :else (do (append! chars "\\") (append! chars ch))) (hs-advance! 1))) (loop)) (= (hs-cur) quote-char) (hs-advance! 1) :else (do (append! chars (hs-cur)) (hs-advance! 1) (loop))))) (loop) (join "" chars)))) (define read-template (fn () (let ((chars (list))) (hs-advance! 1) (define loop (fn () (cond (>= pos src-len) nil (= (hs-cur) "`") (hs-advance! 1) (and (= (hs-cur) "$") (< (+ pos 1) src-len) (= (hs-peek 1) "{")) (do (append! chars "${") (hs-advance! 2) (let ((depth 1)) (define inner (fn () (when (and (< pos src-len) (> depth 0)) (cond (= (hs-cur) "{") (do (set! depth (+ depth 1)) (append! chars (hs-cur)) (hs-advance! 1) (inner)) (= (hs-cur) "}") (do (set! depth (- depth 1)) (when (> depth 0) (append! chars (hs-cur))) (hs-advance! 1) (when (> depth 0) (inner))) :else (do (append! chars (hs-cur)) (hs-advance! 1) (inner)))))) (inner)) (append! chars "}") (loop)) :else (do (append! chars (hs-cur)) (hs-advance! 1) (loop))))) (loop) (join "" chars)))) (define read-selector (fn () (let ((chars (list))) (hs-advance! 1) (define loop (fn () (cond (>= pos src-len) nil (and (= (hs-cur) "/") (< (+ pos 1) src-len) (= (hs-peek 1) ">")) (hs-advance! 2) :else (do (append! chars (hs-cur)) (hs-advance! 1) (loop))))) (loop) (join "" chars)))) (define read-class-name (fn (start) (when (and (< pos src-len) (or (hs-ident-char? (hs-cur)) (= (hs-cur) ":") (= (hs-cur) "\\") (= (hs-cur) "[") (= (hs-cur) "]") (= (hs-cur) "(") (= (hs-cur) ")"))) (when (= (hs-cur) "\\") (hs-advance! 1)) (hs-advance! 1) (read-class-name start)) (slice src start pos))) (define hs-emit! (fn (type value start) (append! tokens (hs-make-token type value start)))) (define scan! (fn () (skip-ws!) (when (< pos src-len) (let ((ch (hs-cur)) (start pos)) (cond (and (= ch "/") (< (+ pos 1) src-len) (= (hs-peek 1) "/")) (do (hs-advance! 2) (skip-comment!) (scan!)) (and (= ch "<") (< (+ pos 1) src-len) (not (= (hs-peek 1) "=")) (or (hs-letter? (hs-peek 1)) (= (hs-peek 1) ".") (= (hs-peek 1) "#") (= (hs-peek 1) "[") (= (hs-peek 1) "*") (= (hs-peek 1) ":"))) (do (hs-emit! "selector" (read-selector) start) (scan!)) (and (= ch ".") (< (+ pos 1) src-len) (or (hs-letter? (hs-peek 1)) (= (hs-peek 1) "-") (= (hs-peek 1) "_"))) (do (hs-advance! 1) (hs-emit! "class" (read-class-name pos) start) (scan!)) (and (= ch "#") (< (+ pos 1) src-len) (hs-ident-start? (hs-peek 1))) (do (hs-advance! 1) (hs-emit! "id" (read-ident pos) start) (scan!)) (and (= ch "@") (< (+ pos 1) src-len) (hs-ident-char? (hs-peek 1))) (do (hs-advance! 1) (hs-emit! "attr" (read-ident pos) start) (scan!)) (and (= ch "~") (< (+ pos 1) src-len) (hs-letter? (hs-peek 1))) (do (hs-advance! 1) (hs-emit! "component" (str "~" (read-ident pos)) start) (scan!)) (and (= ch "*") (< (+ pos 1) src-len) (hs-letter? (hs-peek 1))) (do (hs-advance! 1) (hs-emit! "style" (read-ident pos) start) (scan!)) (and (= ch ":") (< (+ pos 1) src-len) (hs-ident-start? (hs-peek 1))) (do (hs-advance! 1) (hs-emit! "local" (read-ident pos) start) (scan!)) (or (= ch "\"") (and (= ch "'") (not (and (< (+ pos 1) src-len) (= (hs-peek 1) "s") (or (>= (+ pos 2) src-len) (not (hs-ident-char? (hs-peek 2)))))))) (do (hs-emit! "string" (read-string ch) start) (scan!)) (= ch "`") (do (hs-emit! "template" (read-template) start) (scan!)) (hs-digit? ch) (do (hs-emit! "number" (read-number start) start) (scan!)) (hs-ident-start? ch) (do (let ((word (read-ident start))) (hs-emit! (if (hs-keyword? word) "keyword" "ident") word start)) (scan!)) (and (or (= ch "=") (= ch "!") (= ch "<") (= ch ">")) (< (+ pos 1) src-len) (= (hs-peek 1) "=")) (do (hs-emit! "op" (str ch "=") start) (hs-advance! 2) (scan!)) (and (= ch "'") (< (+ pos 1) src-len) (= (hs-peek 1) "s") (or (>= (+ pos 2) src-len) (not (hs-ident-char? (hs-peek 2))))) (do (hs-emit! "op" "'s" start) (hs-advance! 2) (scan!)) (= ch "(") (do (hs-emit! "paren-open" "(" start) (hs-advance! 1) (scan!)) (= ch ")") (do (hs-emit! "paren-close" ")" start) (hs-advance! 1) (scan!)) (= ch "[") (do (hs-emit! "bracket-open" "[" start) (hs-advance! 1) (scan!)) (= ch "]") (do (hs-emit! "bracket-close" "]" start) (hs-advance! 1) (scan!)) (= ch "{") (do (hs-emit! "brace-open" "{" start) (hs-advance! 1) (scan!)) (= ch "}") (do (hs-emit! "brace-close" "}" start) (hs-advance! 1) (scan!)) (= ch ",") (do (hs-emit! "comma" "," start) (hs-advance! 1) (scan!)) (= ch "+") (do (hs-emit! "op" "+" start) (hs-advance! 1) (scan!)) (= ch "-") (do (hs-emit! "op" "-" start) (hs-advance! 1) (scan!)) (= ch "/") (do (hs-emit! "op" "/" start) (hs-advance! 1) (scan!)) (= ch "=") (do (hs-emit! "op" "=" start) (hs-advance! 1) (scan!)) (= ch "<") (do (hs-emit! "op" "<" start) (hs-advance! 1) (scan!)) (= ch ">") (do (hs-emit! "op" ">" start) (hs-advance! 1) (scan!)) (= ch "!") (do (hs-emit! "op" "!" start) (hs-advance! 1) (scan!)) (= ch "*") (do (hs-emit! "op" "*" start) (hs-advance! 1) (scan!)) (= ch "%") (do (hs-emit! "op" "%" start) (hs-advance! 1) (scan!)) (= ch ".") (do (hs-emit! "dot" "." start) (hs-advance! 1) (scan!)) :else (do (hs-advance! 1) (scan!))))))) (scan!) (hs-emit! "eof" nil pos) tokens)))