diff --git a/hosts/ocaml/bin/run_tests.ml b/hosts/ocaml/bin/run_tests.ml index 90491aef..c44fc4fd 100644 --- a/hosts/ocaml/bin/run_tests.ml +++ b/hosts/ocaml/bin/run_tests.ml @@ -1308,6 +1308,8 @@ let run_spec_tests env test_files = load_module "freeze.sx" lib_dir; load_module "content.sx" lib_dir; load_module "parser-combinators.sx" lib_dir; + let hs_dir = Filename.concat lib_dir "hyperscript" in + load_module "tokenizer.sx" hs_dir; load_module "types.sx" lib_dir; load_module "sx-swap.sx" lib_dir; (* Shared templates: TW styling engine *) diff --git a/lib/hyperscript/tokenizer.sx b/lib/hyperscript/tokenizer.sx new file mode 100644 index 00000000..6a622668 --- /dev/null +++ b/lib/hyperscript/tokenizer.sx @@ -0,0 +1,515 @@ +;; _hyperscript tokenizer — produces token stream from hyperscript source +;; +;; Tokens: {:type T :value V :pos P} +;; Types: "keyword" "ident" "number" "string" "class" "id" "attr" "style" +;; "selector" "op" "dot" "paren-open" "paren-close" "bracket-open" +;; "bracket-close" "brace-open" "brace-close" "comma" "colon" +;; "template" "local" "eof" + +;; ── Token constructor ───────────────────────────────────────────── + +(define hs-make-token (fn (type value pos) {:pos pos :value value :type type})) + +;; ── Character predicates ────────────────────────────────────────── + +(define hs-digit? (fn (c) (and (>= c "0") (<= c "9")))) + +(define + hs-letter? + (fn (c) (or (and (>= c "a") (<= c "z")) (and (>= c "A") (<= c "Z"))))) + +(define hs-ident-start? (fn (c) (or (hs-letter? c) (= c "_") (= c "$")))) + +(define + hs-ident-char? + (fn + (c) + (or (hs-letter? c) (hs-digit? c) (= c "_") (= c "$") (= c "-")))) + +(define hs-ws? (fn (c) (or (= c " ") (= c "\t") (= c "\n") (= c "\r")))) + +;; ── Keyword set ─────────────────────────────────────────────────── + +(define + hs-keywords + (list + "on" + "end" + "set" + "to" + "put" + "into" + "before" + "after" + "add" + "remove" + "toggle" + "if" + "else" + "otherwise" + "then" + "from" + "in" + "of" + "for" + "until" + "wait" + "send" + "trigger" + "call" + "get" + "take" + "log" + "hide" + "show" + "repeat" + "while" + "times" + "forever" + "break" + "continue" + "return" + "throw" + "catch" + "finally" + "def" + "tell" + "make" + "fetch" + "as" + "with" + "every" + "or" + "and" + "not" + "is" + "no" + "the" + "my" + "me" + "it" + "its" + "result" + "true" + "false" + "null" + "when" + "between" + "at" + "by" + "queue" + "elsewhere" + "event" + "target" + "detail" + "sender" + "index" + "increment" + "decrement" + "append" + "settle" + "transition" + "over" + "closest" + "next" + "previous" + "first" + "last" + "random" + "empty" + "exists" + "matches" + "contains" + "do" + "unless" + "you" + "your" + "new" + "init" + "start" + "go" + "js" + "less" + "than" + "greater" + "class" + "anything")) + +(define hs-keyword? (fn (word) (some (fn (k) (= k word)) hs-keywords))) + +;; ── Main tokenizer ──────────────────────────────────────────────── + +(define + hs-tokenize + (fn + (src) + (let + ((tokens (list)) (pos 0) (src-len (len src))) + (define + hs-peek + (fn + (offset) + (if (< (+ pos offset) src-len) (nth src (+ pos offset)) nil))) + (define hs-cur (fn () (hs-peek 0))) + (define hs-advance! (fn (n) (set! pos (+ pos n)))) + (define + skip-ws! + (fn + () + (when + (and (< pos src-len) (hs-ws? (hs-cur))) + (hs-advance! 1) + (skip-ws!)))) + (define + skip-comment! + (fn + () + (when + (and (< pos src-len) (not (= (hs-cur) "\n"))) + (hs-advance! 1) + (skip-comment!)))) + (define + read-ident + (fn + (start) + (when + (and (< pos src-len) (hs-ident-char? (hs-cur))) + (hs-advance! 1) + (read-ident start)) + (slice src start pos))) + (define + read-number + (fn + (start) + (when + (and (< pos src-len) (hs-digit? (hs-cur))) + (hs-advance! 1) + (read-number start)) + (when + (and + (< pos src-len) + (= (hs-cur) ".") + (< (+ pos 1) src-len) + (hs-digit? (hs-peek 1))) + (hs-advance! 1) + (define + read-frac + (fn + () + (when + (and (< pos src-len) (hs-digit? (hs-cur))) + (hs-advance! 1) + (read-frac)))) + (read-frac)) + (let + ((num-end pos)) + (when + (and + (< pos src-len) + (or (= (hs-cur) "m") (= (hs-cur) "s"))) + (if + (and + (= (hs-cur) "m") + (< (+ pos 1) src-len) + (= (hs-peek 1) "s")) + (hs-advance! 2) + (when (= (hs-cur) "s") (hs-advance! 1)))) + (slice src start pos)))) + (define + read-string + (fn + (quote-char) + (let + ((chars (list))) + (hs-advance! 1) + (define + loop + (fn + () + (cond + (>= pos src-len) + nil + (= (hs-cur) "\\") + (do + (hs-advance! 1) + (when + (< pos src-len) + (let + ((ch (hs-cur))) + (cond + (= ch "n") + (append! chars "\n") + (= ch "t") + (append! chars "\t") + (= ch "\\") + (append! chars "\\") + (= ch quote-char) + (append! chars quote-char) + :else (do (append! chars "\\") (append! chars ch))) + (hs-advance! 1))) + (loop)) + (= (hs-cur) quote-char) + (hs-advance! 1) + :else (do (append! chars (hs-cur)) (hs-advance! 1) (loop))))) + (loop) + (join "" chars)))) + (define + read-template + (fn + () + (let + ((chars (list))) + (hs-advance! 1) + (define + loop + (fn + () + (cond + (>= pos src-len) + nil + (= (hs-cur) "`") + (hs-advance! 1) + (and + (= (hs-cur) "$") + (< (+ pos 1) src-len) + (= (hs-peek 1) "{")) + (do + (append! chars "${") + (hs-advance! 2) + (let + ((depth 1)) + (define + inner + (fn + () + (when + (and (< pos src-len) (> depth 0)) + (cond + (= (hs-cur) "{") + (do + (set! depth (+ depth 1)) + (append! chars (hs-cur)) + (hs-advance! 1) + (inner)) + (= (hs-cur) "}") + (do + (set! depth (- depth 1)) + (when (> depth 0) (append! chars (hs-cur))) + (hs-advance! 1) + (when (> depth 0) (inner))) + :else (do + (append! chars (hs-cur)) + (hs-advance! 1) + (inner)))))) + (inner)) + (append! chars "}") + (loop)) + :else (do (append! chars (hs-cur)) (hs-advance! 1) (loop))))) + (loop) + (join "" chars)))) + (define + read-selector + (fn + () + (let + ((chars (list))) + (hs-advance! 1) + (define + loop + (fn + () + (cond + (>= pos src-len) + nil + (and + (= (hs-cur) "/") + (< (+ pos 1) src-len) + (= (hs-peek 1) ">")) + (hs-advance! 2) + :else (do (append! chars (hs-cur)) (hs-advance! 1) (loop))))) + (loop) + (join "" chars)))) + (define + read-class-name + (fn + (start) + (when + (and + (< pos src-len) + (or + (hs-ident-char? (hs-cur)) + (= (hs-cur) ":") + (= (hs-cur) "\\") + (= (hs-cur) "[") + (= (hs-cur) "]") + (= (hs-cur) "(") + (= (hs-cur) ")"))) + (when (= (hs-cur) "\\") (hs-advance! 1)) + (hs-advance! 1) + (read-class-name start)) + (slice src start pos))) + (define + hs-emit! + (fn + (type value start) + (append! tokens (hs-make-token type value start)))) + (define + scan! + (fn + () + (skip-ws!) + (when + (< pos src-len) + (let + ((ch (hs-cur)) (start pos)) + (cond + (and (= ch "/") (< (+ pos 1) src-len) (= (hs-peek 1) "/")) + (do (hs-advance! 2) (skip-comment!) (scan!)) + (and + (= ch "<") + (< (+ pos 1) src-len) + (not (= (hs-peek 1) "=")) + (or + (hs-letter? (hs-peek 1)) + (= (hs-peek 1) ".") + (= (hs-peek 1) "#") + (= (hs-peek 1) "[") + (= (hs-peek 1) "*") + (= (hs-peek 1) ":"))) + (do (hs-emit! "selector" (read-selector) start) (scan!)) + (and + (= ch ".") + (< (+ pos 1) src-len) + (or + (hs-letter? (hs-peek 1)) + (= (hs-peek 1) "-") + (= (hs-peek 1) "_"))) + (do + (hs-advance! 1) + (hs-emit! "class" (read-class-name pos) start) + (scan!)) + (and + (= ch "#") + (< (+ pos 1) src-len) + (hs-ident-start? (hs-peek 1))) + (do + (hs-advance! 1) + (hs-emit! "id" (read-ident pos) start) + (scan!)) + (and + (= ch "@") + (< (+ pos 1) src-len) + (hs-ident-char? (hs-peek 1))) + (do + (hs-advance! 1) + (hs-emit! "attr" (read-ident pos) start) + (scan!)) + (and + (= ch "*") + (< (+ pos 1) src-len) + (hs-letter? (hs-peek 1))) + (do + (hs-advance! 1) + (hs-emit! "style" (read-ident pos) start) + (scan!)) + (and + (= ch ":") + (< (+ pos 1) src-len) + (hs-ident-start? (hs-peek 1))) + (do + (hs-advance! 1) + (hs-emit! "local" (read-ident pos) start) + (scan!)) + (or + (= ch "\"") + (and + (= ch "'") + (not + (and + (< (+ pos 1) src-len) + (= (hs-peek 1) "s") + (or + (>= (+ pos 2) src-len) + (not (hs-ident-char? (hs-peek 2)))))))) + (do (hs-emit! "string" (read-string ch) start) (scan!)) + (= ch "`") + (do (hs-emit! "template" (read-template) start) (scan!)) + (hs-digit? ch) + (do (hs-emit! "number" (read-number start) start) (scan!)) + (hs-ident-start? ch) + (do + (let + ((word (read-ident start))) + (hs-emit! + (if (hs-keyword? word) "keyword" "ident") + word + start)) + (scan!)) + (and + (or (= ch "=") (= ch "!") (= ch "<") (= ch ">")) + (< (+ pos 1) src-len) + (= (hs-peek 1) "=")) + (do + (hs-emit! "op" (str ch "=") start) + (hs-advance! 2) + (scan!)) + (and + (= ch "'") + (< (+ pos 1) src-len) + (= (hs-peek 1) "s") + (or + (>= (+ pos 2) src-len) + (not (hs-ident-char? (hs-peek 2))))) + (do (hs-emit! "op" "'s" start) (hs-advance! 2) (scan!)) + (= ch "(") + (do + (hs-emit! "paren-open" "(" start) + (hs-advance! 1) + (scan!)) + (= ch ")") + (do + (hs-emit! "paren-close" ")" start) + (hs-advance! 1) + (scan!)) + (= ch "[") + (do + (hs-emit! "bracket-open" "[" start) + (hs-advance! 1) + (scan!)) + (= ch "]") + (do + (hs-emit! "bracket-close" "]" start) + (hs-advance! 1) + (scan!)) + (= ch "{") + (do + (hs-emit! "brace-open" "{" start) + (hs-advance! 1) + (scan!)) + (= ch "}") + (do + (hs-emit! "brace-close" "}" start) + (hs-advance! 1) + (scan!)) + (= ch ",") + (do (hs-emit! "comma" "," start) (hs-advance! 1) (scan!)) + (= ch "+") + (do (hs-emit! "op" "+" start) (hs-advance! 1) (scan!)) + (= ch "-") + (do (hs-emit! "op" "-" start) (hs-advance! 1) (scan!)) + (= ch "/") + (do (hs-emit! "op" "/" start) (hs-advance! 1) (scan!)) + (= ch "=") + (do (hs-emit! "op" "=" start) (hs-advance! 1) (scan!)) + (= ch "<") + (do (hs-emit! "op" "<" start) (hs-advance! 1) (scan!)) + (= ch ">") + (do (hs-emit! "op" ">" start) (hs-advance! 1) (scan!)) + (= ch "!") + (do (hs-emit! "op" "!" start) (hs-advance! 1) (scan!)) + (= ch ".") + (do (hs-emit! "dot" "." start) (hs-advance! 1) (scan!)) + :else (do (hs-advance! 1) (scan!))))))) + (scan!) + (hs-emit! "eof" nil pos) + tokens))) \ No newline at end of file diff --git a/spec/tests/test-hyperscript-tokenizer.sx b/spec/tests/test-hyperscript-tokenizer.sx new file mode 100644 index 00000000..ab3e6c22 --- /dev/null +++ b/spec/tests/test-hyperscript-tokenizer.sx @@ -0,0 +1,314 @@ +;; _hyperscript tokenizer tests + +;; helper: get token types as a flat list +(define hs-types (fn (tokens) (map (fn (t) (get t "type")) tokens))) +(define hs-vals (fn (tokens) (map (fn (t) (get t "value")) tokens))) +(define hs-tok (fn (tokens n) (nth tokens n))) + +(defsuite + "hs-tokenize-basics" + (deftest + "empty input" + (let + ((tokens (hs-tokenize ""))) + (assert= 1 (len tokens)) + (assert= "eof" (get (first tokens) "type")))) + (deftest + "single keyword" + (let + ((tokens (hs-tokenize "on"))) + (assert= 2 (len tokens)) + (assert= "keyword" (get (first tokens) "type")) + (assert= "on" (get (first tokens) "value")))) + (deftest + "identifier" + (let + ((tokens (hs-tokenize "myVar"))) + (assert= "ident" (get (first tokens) "type")) + (assert= "myVar" (get (first tokens) "value")))) + (deftest + "keywords vs identifiers" + (let + ((tokens (hs-tokenize "on click add foo"))) + (assert= "keyword" (get (hs-tok tokens 0) "type")) + (assert= "ident" (get (hs-tok tokens 1) "type")) + (assert= "keyword" (get (hs-tok tokens 2) "type")) + (assert= "ident" (get (hs-tok tokens 3) "type")))) + (deftest + "whitespace skipped" + (let + ((tokens (hs-tokenize " on click "))) + (assert= 3 (len tokens)) + (assert= "on" (get (first tokens) "value"))))) + +(defsuite + "hs-tokenize-literals" + (deftest + "integer" + (let + ((t (first (hs-tokenize "42")))) + (assert= "number" (get t "type")) + (assert= "42" (get t "value")))) + (deftest + "decimal" + (let + ((t (first (hs-tokenize "3.14")))) + (assert= "number" (get t "type")) + (assert= "3.14" (get t "value")))) + (deftest + "number with ms unit" + (let + ((t (first (hs-tokenize "100ms")))) + (assert= "number" (get t "type")) + (assert= "100ms" (get t "value")))) + (deftest + "number with s unit" + (let + ((t (first (hs-tokenize "2s")))) + (assert= "number" (get t "type")) + (assert= "2s" (get t "value")))) + (deftest + "double-quoted string" + (let + ((t (first (hs-tokenize "\"hello world\"")))) + (assert= "string" (get t "type")) + (assert= "hello world" (get t "value")))) + (deftest + "single-quoted string" + (let + ((t (first (hs-tokenize "'foo'")))) + (assert= "string" (get t "type")) + (assert= "foo" (get t "value")))) + (deftest + "string with escapes" + (let + ((t (first (hs-tokenize "\"a\\nb\"")))) + (assert= "string" (get t "type")) + (assert= "a\nb" (get t "value")))) + (deftest + "template literal" + (let + ((t (first (hs-tokenize "`hello ${name}`")))) + (assert= "template" (get t "type")) + (assert= "hello ${name}" (get t "value"))))) + +(defsuite + "hs-tokenize-dom-refs" + (deftest + "class literal" + (let + ((t (first (hs-tokenize ".foo")))) + (assert= "class" (get t "type")) + (assert= "foo" (get t "value")))) + (deftest + "class with dashes" + (let + ((t (first (hs-tokenize ".foo--bar")))) + (assert= "class" (get t "type")) + (assert= "foo--bar" (get t "value")))) + (deftest + "id literal" + (let + ((t (first (hs-tokenize "#bar")))) + (assert= "id" (get t "type")) + (assert= "bar" (get t "value")))) + (deftest + "attribute ref" + (let + ((t (first (hs-tokenize "@foo")))) + (assert= "attr" (get t "type")) + (assert= "foo" (get t "value")))) + (deftest + "style ref" + (let + ((t (first (hs-tokenize "*color")))) + (assert= "style" (get t "type")) + (assert= "color" (get t "value")))) + (deftest + "element-scoped local" + (let + ((t (first (hs-tokenize ":myVar")))) + (assert= "local" (get t "type")) + (assert= "myVar" (get t "value")))) + (deftest + "CSS selector" + (let + ((t (first (hs-tokenize "
")))) + (assert= "selector" (get t "type")) + (assert= "p" (get t "value")))) + (deftest + "CSS selector complex" + (let + ((t (first (hs-tokenize "