;; Ruby tokenizer tests. ;; Final value: {:pass N :fail N :fails (list)} (define rb-deep=? (fn (a b) (cond ((= a b) true) ((and (dict? a) (dict? b)) (let ((ak (keys a)) (bk (keys b))) (if (not (= (len ak) (len bk))) false (every? (fn (k) (and (has-key? b k) (rb-deep=? (get a k) (get b k)))) ak)))) ((and (list? a) (list? b)) (if (not (= (len a) (len b))) false (let ((i 0) (ok true)) (define rb-de-loop (fn () (when (and ok (< i (len a))) (do (when (not (rb-deep=? (nth a i) (nth b i))) (set! ok false)) (set! i (+ i 1)) (rb-de-loop))))) (rb-de-loop) ok))) (:else false)))) (define rb-test-pass 0) (define rb-test-fail 0) (define rb-test-fails (list)) (define rb-test (fn (name actual expected) (if (rb-deep=? actual expected) (set! rb-test-pass (+ rb-test-pass 1)) (do (set! rb-test-fail (+ rb-test-fail 1)) (append! rb-test-fails {:name name :actual actual :expected expected}))))) ;; Helper: tokenize, drop newline+eof, return {:type :value} pairs (define rb-toks (fn (src) (map (fn (tok) {:value (get tok "value") :type (get tok "type")}) (filter (fn (tok) (let ((ty (get tok "type"))) (not (or (= ty "newline") (= ty "eof"))))) (rb-tokenize src))))) ;; Helper: get just types (define rb-types (fn (src) (map (fn (t) (get t "type")) (rb-toks src)))) ;; Helper: get first token type (define rb-first-type (fn (src) (get (get (rb-tokenize src) 0) "type"))) (define rb-first-value (fn (src) (get (get (rb-tokenize src) 0) "value"))) ;; ── 1. Keywords ────────────────────────���───────────────────────── (rb-test "keyword def" (rb-toks "def") (list {:value "def" :type "keyword"})) (rb-test "keyword end" (rb-toks "end") (list {:value "end" :type "keyword"})) (rb-test "keyword class" (rb-toks "class") (list {:value "class" :type "keyword"})) (rb-test "keyword if" (rb-toks "if") (list {:value "if" :type "keyword"})) (rb-test "keyword while" (rb-toks "while") (list {:value "while" :type "keyword"})) (rb-test "keyword nil" (rb-toks "nil") (list {:value "nil" :type "keyword"})) (rb-test "keyword true" (rb-toks "true") (list {:value "true" :type "keyword"})) (rb-test "keyword false" (rb-toks "false") (list {:value "false" :type "keyword"})) (rb-test "keyword return" (rb-toks "return") (list {:value "return" :type "keyword"})) (rb-test "keyword yield" (rb-toks "yield") (list {:value "yield" :type "keyword"})) (rb-test "keyword begin" (rb-toks "begin") (list {:value "begin" :type "keyword"})) (rb-test "keyword rescue" (rb-toks "rescue") (list {:value "rescue" :type "keyword"})) (rb-test "keyword self" (rb-toks "self") (list {:value "self" :type "keyword"})) (rb-test "keyword super" (rb-toks "super") (list {:value "super" :type "keyword"})) ;; ── 2. Identifiers ──────────────────────────────────────────────── (rb-test "ident simple" (rb-toks "foo") (list {:value "foo" :type "ident"})) (rb-test "ident underscore" (rb-toks "_foo") (list {:value "_foo" :type "ident"})) (rb-test "ident with digit" (rb-toks "foo2") (list {:value "foo2" :type "ident"})) (rb-test "ident predicate" (rb-toks "empty?") (list {:value "empty?" :type "ident"})) (rb-test "ident bang" (rb-toks "save!") (list {:value "save!" :type "ident"})) (rb-test "defined?" (rb-toks "defined?") (list {:value "defined?" :type "keyword"})) ;; ── 3. Constants ────────────────────────────────────────────────── (rb-test "const simple" (rb-toks "Foo") (list {:value "Foo" :type "const"})) (rb-test "const upcase" (rb-toks "MY_CONST") (list {:value "MY_CONST" :type "const"})) (rb-test "const class" (rb-toks "String") (list {:value "String" :type "const"})) ;; ── 4. Sigil variables ─────────────────────────────────────────── (rb-test "ivar" (rb-toks "@name") (list {:value "@name" :type "ivar"})) (rb-test "cvar" (rb-toks "@@count") (list {:value "@@count" :type "cvar"})) (rb-test "gvar" (rb-toks "$global") (list {:value "$global" :type "gvar"})) ;; ── 5. Integers ─────────────────────────────────────────────────── (rb-test "int decimal" (rb-first-value "42") 42) (rb-test "int zero" (rb-first-value "0") 0) (rb-test "int underscore" (rb-first-value "1_000") 1000) (rb-test "int hex" (rb-first-value "0xFF") 255) (rb-test "int hex lower" (rb-first-value "0xff") 255) (rb-test "int octal" (rb-first-value "0o17") 15) (rb-test "int binary" (rb-first-value "0b1010") 10) (rb-test "int type" (rb-first-type "42") "int") ;; ── 6. Floats ───────────────────────────────────────────────────── (rb-test "float simple" (rb-first-type "3.14") "float") (rb-test "float value" (rb-first-value "3.14") "3.14") (rb-test "float exp" (rb-first-type "1.5e10") "float") (rb-test "float exp value" (rb-first-value "1.5e10") "1.5e10") ;; ── 7. Strings ──────────────────────────────────────────────────── (rb-test "dq string" (rb-first-value "\"hello\"") "hello") (rb-test "dq string type" (rb-first-type "\"hello\"") "string") (rb-test "sq string" (rb-first-value "'world'") "world") (rb-test "dq escape nl" (rb-first-value "\"a\\nb\"") "a\nb") (rb-test "dq escape tab" (rb-first-value "\"a\\tb\"") "a\tb") (rb-test "dq escape quote" (rb-first-value "\"a\\\"b\"") "a\"b") (rb-test "sq no escape" (rb-first-value "'a\\nb'") "a\\nb") (rb-test "sq escape backslash" (rb-first-value "'a\\\\'") "a\\") (rb-test "dq interp kept" (rb-first-value "\"#{x}\"") "#{x}") ;; ── 8. Symbols ──────────────────────────────────────────────────── (rb-test "symbol simple" (rb-first-type ":foo") "symbol") (rb-test "symbol value" (rb-first-value ":foo") "foo") (rb-test "symbol predicate" (rb-first-value ":empty?") "empty?") (rb-test "symbol dq" (rb-first-value ":\"hello world\"") "hello world") (rb-test "symbol sq" (rb-first-value ":'hello'") "hello") ;; ── 9. %w and %i literals ──────────────────────────────────────── (rb-test "%w bracket" (rb-first-type "%w[a b c]") "words") (rb-test "%w value" (rb-first-value "%w[a b c]") (list "a" "b" "c")) (rb-test "%w paren" (rb-first-value "%w(x y)") (list "x" "y")) (rb-test "%i bracket" (rb-first-type "%i[a b]") "isymbols") (rb-test "%i value" (rb-first-value "%i[foo bar]") (list "foo" "bar")) ;; ── 10. Punctuation ─────────────────────────────────────────────── (rb-test "dot" (rb-first-type ".") "dot") (rb-test "dotdot" (rb-first-type "..") "dotdot") (rb-test "dotdotdot" (rb-first-type "...") "dotdotdot") (rb-test "dcolon" (rb-first-type "::") "dcolon") (rb-test "comma" (rb-first-type ",") "comma") (rb-test "semi" (rb-first-type ";") "semi") (rb-test "lparen" (rb-first-type "(") "lparen") (rb-test "rparen" (rb-first-type ")") "rparen") (rb-test "lbracket" (rb-first-type "[") "lbracket") (rb-test "rbracket" (rb-first-type "]") "rbracket") (rb-test "lbrace" (rb-first-type "{") "lbrace") (rb-test "rbrace" (rb-first-type "}") "rbrace") (rb-test "pipe" (rb-first-type "|") "pipe") ;; ── 11. Operators ───────────────────────────────────────────────── (rb-test "op plus" (rb-first-value "+") "+") (rb-test "op minus" (rb-first-value "-") "-") (rb-test "op star" (rb-first-value "*") "*") (rb-test "op slash" (rb-first-value "/") "/") (rb-test "op eq" (rb-first-value "=") "=") (rb-test "op eqeq" (rb-first-value "==") "==") (rb-test "op neq" (rb-first-value "!=") "!=") (rb-test "op lt" (rb-first-value "<") "<") (rb-test "op gt" (rb-first-value ">") ">") (rb-test "op lte" (rb-first-value "<=") "<=") (rb-test "op gte" (rb-first-value ">=") ">=") (rb-test "op spaceship" (rb-first-value "<=>") "<=>") (rb-test "op tripleq" (rb-first-value "===") "===") (rb-test "op match" (rb-first-value "=~") "=~") (rb-test "op nomatch" (rb-first-value "!~") "!~") (rb-test "op lshift" (rb-first-value "<<") "<<") (rb-test "op rshift" (rb-first-value ">>") ">>") (rb-test "op and" (rb-first-value "&&") "&&") (rb-test "op or" (rb-first-value "||") "||") (rb-test "op power" (rb-first-value "**") "**") (rb-test "op plus-eq" (rb-first-value "+=") "+=") (rb-test "op minus-eq" (rb-first-value "-=") "-=") (rb-test "op arrow" (rb-first-value "->") "->") (rb-test "op hash-rocket" (rb-first-value "=>") "=>") ;; ── 12. Comments ────────────────────────────────────────────────── (rb-test "comment skipped" (len (rb-toks "# this is a comment")) 0) (rb-test "comment mid-line" (rb-types "x = 1 # comment") (list "ident" "op" "int")) ;; ── 13. Multi-token sequences ───────────────────────────────────── (rb-test "method call" (rb-types "foo.bar") (list "ident" "dot" "ident")) (rb-test "class def" (rb-types "class Foo") (list "keyword" "const")) (rb-test "method def" (rb-types "def greet(name)") (list "keyword" "ident" "lparen" "ident" "rparen")) (rb-test "assignment" (rb-types "x = 42") (list "ident" "op" "int")) (rb-test "block params" (rb-types "|x, y|") (list "pipe" "ident" "comma" "ident" "pipe")) (rb-test "scope resolution" (rb-types "Foo::Bar") (list "const" "dcolon" "const")) (rb-test "range" (rb-types "1..10") (list "int" "dotdot" "int")) (rb-test "exclusive range" (rb-types "1...10") (list "int" "dotdotdot" "int")) ;; ── 14. Line/col tracking ──────────────────────────────────────── (define rb-tok1 (get (rb-tokenize "hello\nworld") 0)) (define rb-tok2 (get (rb-tokenize "hello\nworld") 2)) (rb-test "line track start" (get rb-tok1 "line") 1) (rb-test "line track second" (get rb-tok2 "line") 2) (rb-test "col track start" (get rb-tok1 "col") 1) (list rb-test-pass rb-test-fail)