From 15eb133311b3e24fcdae399689b2fb1c91485487 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 25 Apr 2026 18:50:49 +0000 Subject: [PATCH] ruby: Phase 1 parser (+83 tests, 190 total) --- lib/ruby/parser.sx | 831 ++++++++++++++++++++++++++++++++++++++++ lib/ruby/test.sh | 37 +- lib/ruby/tests/parse.sx | 439 +++++++++++++++++++++ plans/ruby-on-sx.md | 5 +- 4 files changed, 1295 insertions(+), 17 deletions(-) create mode 100644 lib/ruby/parser.sx create mode 100644 lib/ruby/tests/parse.sx diff --git a/lib/ruby/parser.sx b/lib/ruby/parser.sx new file mode 100644 index 00000000..49a3b197 --- /dev/null +++ b/lib/ruby/parser.sx @@ -0,0 +1,831 @@ +;; Ruby parser: token list → AST. +;; Entry: (rb-parse tokens) or (rb-parse-str src) +;; AST nodes: dicts with :type plus type-specific fields. + +(define rb-parse + (fn (tokens) + (let ((pos 0) (tok-count (len tokens))) + + (define rb-p-cur + (fn () (nth tokens pos))) + (define rb-p-peek + (fn (n) + (if (< (+ pos n) tok-count) + (nth tokens (+ pos n)) + {:type "eof" :value nil :line 0 :col 0}))) + (define rb-p-advance! + (fn () (set! pos (+ pos 1)))) + (define rb-p-type + (fn () (get (rb-p-cur) :type))) + (define rb-p-val + (fn () (get (rb-p-cur) :value))) + (define rb-p-sep? + (fn () (or (= (rb-p-type) "newline") (= (rb-p-type) "semi")))) + (define rb-p-skip-seps! + (fn () + (when (rb-p-sep?) + (do (rb-p-advance!) (rb-p-skip-seps!))))) + (define rb-p-skip-newlines! + (fn () + (when (= (rb-p-type) "newline") + (do (rb-p-advance!) (rb-p-skip-newlines!))))) + (define rb-p-expect! + (fn (type) + (if (= (rb-p-type) type) + (let ((tok (rb-p-cur))) + (rb-p-advance!) + tok) + {:type "error" + :msg (join "" (list "expected " type " got " (rb-p-type)))}))) + (define rb-p-expect-kw! + (fn (kw) + (when (and (= (rb-p-type) "keyword") (= (rb-p-val) kw)) + (rb-p-advance!)))) + + ;; Block: do |params| body end or { |params| body } + (define rb-p-parse-block-params + (fn () + (if (= (rb-p-type) "pipe") + (do + (rb-p-advance!) + (let ((params (list))) + (define rb-p-bp-loop + (fn () + (when (not (or (= (rb-p-type) "pipe") (= (rb-p-type) "eof"))) + (do + (cond + ((and (= (rb-p-type) "op") (= (rb-p-val) "**")) + (do + (rb-p-advance!) + (append! params {:type "param-kwrest" :name (rb-p-val)}) + (rb-p-advance!))) + ((and (= (rb-p-type) "op") (= (rb-p-val) "*")) + (do + (rb-p-advance!) + (if (= (rb-p-type) "ident") + (do + (append! params {:type "param-rest" :name (rb-p-val)}) + (rb-p-advance!)) + (append! params {:type "param-rest" :name nil})))) + (:else + (do + (append! params {:type "param-req" :name (rb-p-val)}) + (rb-p-advance!)))) + (when (= (rb-p-type) "comma") (rb-p-advance!)) + (rb-p-bp-loop))))) + (rb-p-bp-loop) + (rb-p-expect! "pipe") + params)) + (list)))) + + (define rb-p-parse-block + (fn () + (cond + ((and (= (rb-p-type) "keyword") (= (rb-p-val) "do")) + (do + (rb-p-advance!) + (let ((params (rb-p-parse-block-params))) + (rb-p-skip-seps!) + (let ((body (rb-p-parse-stmts (list "end")))) + (rb-p-expect-kw! "end") + {:type "block" :params params :body body})))) + ((= (rb-p-type) "lbrace") + (do + (rb-p-advance!) + (let ((params (rb-p-parse-block-params))) + (rb-p-skip-seps!) + (let ((body (rb-p-parse-stmts (list "rbrace")))) + (rb-p-expect! "rbrace") + {:type "block" :params params :body body})))) + (:else nil)))) + + ;; Method def params + (define rb-p-parse-def-params + (fn () + (let ((params (list))) + (define rb-p-dp-one + (fn () + (cond + ((and (= (rb-p-type) "op") (= (rb-p-val) "&")) + (do + (rb-p-advance!) + (append! params {:type "param-block" :name (rb-p-val)}) + (rb-p-advance!))) + ((and (= (rb-p-type) "op") (= (rb-p-val) "**")) + (do + (rb-p-advance!) + (append! params {:type "param-kwrest" :name (rb-p-val)}) + (rb-p-advance!))) + ((and (= (rb-p-type) "op") (= (rb-p-val) "*")) + (do + (rb-p-advance!) + (if (= (rb-p-type) "ident") + (do + (append! params {:type "param-rest" :name (rb-p-val)}) + (rb-p-advance!)) + (append! params {:type "param-rest" :name nil})))) + ((and (= (rb-p-type) "ident") + (= (get (rb-p-peek 1) :type) "colon")) + (do + (let ((name (rb-p-val))) + (rb-p-advance!) + (rb-p-advance!) + (if (or (rb-p-sep?) (= (rb-p-type) "comma") + (= (rb-p-type) "rparen") (= (rb-p-type) "eof")) + (append! params {:type "param-kw" :name name :default nil}) + (append! params {:type "param-kw" :name name + :default (rb-p-parse-assign)}))))) + (:else + (let ((name (rb-p-val))) + (rb-p-advance!) + (if (and (= (rb-p-type) "op") (= (rb-p-val) "=")) + (do + (rb-p-advance!) + (append! params {:type "param-opt" :name name + :default (rb-p-parse-assign)})) + (append! params {:type "param-req" :name name}))))))) + (define rb-p-dp-loop + (fn () + (when (not (or (= (rb-p-type) "rparen") (rb-p-sep?) + (= (rb-p-type) "eof"))) + (do + (rb-p-dp-one) + (when (= (rb-p-type) "comma") + (do (rb-p-advance!) (rb-p-skip-newlines!))) + (rb-p-dp-loop))))) + (rb-p-dp-loop) + params))) + + ;; def [recv.] name [(params)] body end + (define rb-p-parse-def + (fn () + (rb-p-advance!) + (let ((recv nil) (name nil)) + (cond + ((and (= (rb-p-type) "keyword") (= (rb-p-val) "self") + (= (get (rb-p-peek 1) :type) "dot")) + (do + (set! recv {:type "self"}) + (rb-p-advance!) + (rb-p-advance!) + (set! name (rb-p-val)) + (rb-p-advance!))) + ((and (= (rb-p-type) "ident") + (= (get (rb-p-peek 1) :type) "dot")) + (do + (set! recv {:type "lvar" :name (rb-p-val)}) + (rb-p-advance!) + (rb-p-advance!) + (set! name (rb-p-val)) + (rb-p-advance!))) + (:else + (do + (set! name (rb-p-val)) + (rb-p-advance!)))) + (let ((params (list))) + (cond + ((= (rb-p-type) "lparen") + (do + (rb-p-advance!) + (rb-p-skip-newlines!) + (set! params (rb-p-parse-def-params)) + (rb-p-expect! "rparen"))) + ((not (or (rb-p-sep?) (= (rb-p-type) "eof"))) + (set! params (rb-p-parse-def-params))) + (:else nil)) + (rb-p-skip-seps!) + (let ((body (rb-p-parse-stmts (list "end")))) + (rb-p-expect-kw! "end") + {:type "method-def" :recv recv :name name + :params params :body body}))))) + + ;; class [< v, ...} + (define rb-p-parse-hash + (fn () + (rb-p-advance!) + (rb-p-skip-newlines!) + (let ((pairs (list))) + (define rb-p-hash-loop + (fn () + (when (not (or (= (rb-p-type) "rbrace") (= (rb-p-type) "eof"))) + (do + (let ((key nil) (val nil) (style nil)) + (cond + ((and (or (= (rb-p-type) "ident") (= (rb-p-type) "const")) + (= (get (rb-p-peek 1) :type) "colon")) + (do + (set! key {:type "lit-sym" :value (rb-p-val)}) + (set! style "colon") + (rb-p-advance!) + (rb-p-advance!))) + (:else + (do + (set! key (rb-p-parse-assign)) + (set! style "rocket") + (when (and (= (rb-p-type) "op") (= (rb-p-val) "=>")) + (rb-p-advance!))))) + (rb-p-skip-newlines!) + (set! val (rb-p-parse-assign)) + (append! pairs {:key key :val val :style style})) + (rb-p-skip-newlines!) + (when (= (rb-p-type) "comma") + (do (rb-p-advance!) (rb-p-skip-newlines!))) + (rb-p-hash-loop))))) + (rb-p-hash-loop) + (rb-p-expect! "rbrace") + {:type "hash" :pairs pairs}))) + + ;; (a, *b, **c, &d) + (define rb-p-parse-args-parens + (fn () + (rb-p-advance!) + (rb-p-skip-newlines!) + (let ((args (list))) + (define rb-p-ap-loop + (fn () + (when (not (or (= (rb-p-type) "rparen") (= (rb-p-type) "eof"))) + (do + (cond + ((and (= (rb-p-type) "op") (= (rb-p-val) "**")) + (do (rb-p-advance!) + (append! args {:type "dsplat" :value (rb-p-parse-assign)}))) + ((and (= (rb-p-type) "op") (= (rb-p-val) "*")) + (do (rb-p-advance!) + (append! args {:type "splat" :value (rb-p-parse-assign)}))) + ((and (= (rb-p-type) "op") (= (rb-p-val) "&")) + (do (rb-p-advance!) + (append! args {:type "block-pass" :value (rb-p-parse-assign)}))) + (:else (append! args (rb-p-parse-assign)))) + (rb-p-skip-newlines!) + (when (= (rb-p-type) "comma") + (do (rb-p-advance!) (rb-p-skip-newlines!))) + (rb-p-ap-loop))))) + (rb-p-ap-loop) + (rb-p-expect! "rparen") + args))) + + ;; No-paren arg list up to sep/end-keyword + (define rb-p-parse-args-bare + (fn () + (let ((args (list)) (going true)) + (define rb-p-ab-loop + (fn () + (when (and going + (not (rb-p-sep?)) + (not (= (rb-p-type) "eof")) + (not (= (rb-p-type) "rparen")) + (not (= (rb-p-type) "rbracket")) + (not (= (rb-p-type) "rbrace")) + (not (and (= (rb-p-type) "keyword") + (contains? (list "end" "else" "elsif" "when" + "rescue" "ensure" "then" "do") + (rb-p-val))))) + (do + (cond + ((and (= (rb-p-type) "op") (= (rb-p-val) "*")) + (do (rb-p-advance!) + (append! args {:type "splat" :value (rb-p-parse-assign)}))) + ((and (= (rb-p-type) "op") (= (rb-p-val) "**")) + (do (rb-p-advance!) + (append! args {:type "dsplat" :value (rb-p-parse-assign)}))) + ((and (= (rb-p-type) "op") (= (rb-p-val) "&")) + (do (rb-p-advance!) + (append! args {:type "block-pass" :value (rb-p-parse-assign)}))) + (:else (append! args (rb-p-parse-assign)))) + (if (= (rb-p-type) "comma") + (do (rb-p-advance!) (rb-p-skip-newlines!) (rb-p-ab-loop)) + (set! going false)))))) + (rb-p-ab-loop) + args))) + + ;; Primary expression + (define rb-p-parse-primary + (fn () + (cond + ((= (rb-p-type) "int") + (let ((v (rb-p-val))) (rb-p-advance!) {:type "lit-int" :value v})) + ((= (rb-p-type) "float") + (let ((v (rb-p-val))) (rb-p-advance!) {:type "lit-float" :value v})) + ((= (rb-p-type) "string") + (let ((v (rb-p-val))) (rb-p-advance!) {:type "lit-str" :value v})) + ((= (rb-p-type) "symbol") + (let ((v (rb-p-val))) (rb-p-advance!) {:type "lit-sym" :value v})) + ((= (rb-p-type) "words") + (let ((v (rb-p-val))) (rb-p-advance!) {:type "lit-words" :elems v})) + ((= (rb-p-type) "isymbols") + (let ((v (rb-p-val))) (rb-p-advance!) {:type "lit-isyms" :elems v})) + ((= (rb-p-type) "ivar") + (let ((v (rb-p-val))) (rb-p-advance!) {:type "ivar" :name v})) + ((= (rb-p-type) "cvar") + (let ((v (rb-p-val))) (rb-p-advance!) {:type "cvar" :name v})) + ((= (rb-p-type) "gvar") + (let ((v (rb-p-val))) (rb-p-advance!) {:type "gvar" :name v})) + ((= (rb-p-type) "const") + (rb-p-parse-const-path)) + ((= (rb-p-type) "ident") + (let ((name (rb-p-val))) + (rb-p-advance!) + (if (= (rb-p-type) "lparen") + (let ((args (rb-p-parse-args-parens)) + (blk (rb-p-parse-block))) + {:type "send" :name name :args args :block blk}) + {:type "send" :name name :args (list) :block nil}))) + ((= (rb-p-type) "keyword") + (cond + ((= (rb-p-val) "nil") + (do (rb-p-advance!) {:type "lit-nil"})) + ((= (rb-p-val) "true") + (do (rb-p-advance!) {:type "lit-bool" :value true})) + ((= (rb-p-val) "false") + (do (rb-p-advance!) {:type "lit-bool" :value false})) + ((= (rb-p-val) "self") + (do (rb-p-advance!) {:type "self"})) + ((= (rb-p-val) "super") + (do + (rb-p-advance!) + (let ((args (if (= (rb-p-type) "lparen") + (rb-p-parse-args-parens) (list))) + (blk (rb-p-parse-block))) + {:type "send" :name "super" :args args :block blk}))) + (:else + {:type "error" + :msg (join "" (list "unexpected kw " (rb-p-val)))}))) + ((= (rb-p-type) "lbracket") + (rb-p-parse-array)) + ((= (rb-p-type) "lbrace") + (rb-p-parse-hash)) + ((= (rb-p-type) "lparen") + (do + (rb-p-advance!) + (rb-p-skip-seps!) + (let ((node (rb-p-parse-expr))) + (rb-p-skip-seps!) + (rb-p-expect! "rparen") + node))) + (:else + (do + (rb-p-advance!) + {:type "error" + :msg (join "" (list "unexpected " (rb-p-type) + " '" (or (rb-p-val) "") "'"))}))))) + + ;; .method ::Const [index] chains + (define rb-p-parse-postfix + (fn () + (let ((node (rb-p-parse-primary))) + (define rb-p-pf-loop + (fn () + (cond + ((= (rb-p-type) "dot") + (do + (rb-p-advance!) + (let ((method (rb-p-val))) + (rb-p-advance!) + (let ((args (if (= (rb-p-type) "lparen") + (rb-p-parse-args-parens) (list))) + (blk (rb-p-parse-block))) + (set! node {:type "call" :recv node :method method + :args args :block blk}) + (rb-p-pf-loop))))) + ((= (rb-p-type) "dcolon") + (do + (rb-p-advance!) + (let ((name (rb-p-val))) + (rb-p-advance!) + (if (= (rb-p-type) "lparen") + (let ((args (rb-p-parse-args-parens)) + (blk (rb-p-parse-block))) + (set! node {:type "call" :recv node :method name + :args args :block blk})) + (set! node {:type "const-path" :left node :name name})) + (rb-p-pf-loop)))) + ((= (rb-p-type) "lbracket") + (do + (rb-p-advance!) + (rb-p-skip-newlines!) + (let ((idxargs (list))) + (define rb-p-idx-loop + (fn () + (when (not (or (= (rb-p-type) "rbracket") (= (rb-p-type) "eof"))) + (do + (append! idxargs (rb-p-parse-assign)) + (when (= (rb-p-type) "comma") + (do (rb-p-advance!) (rb-p-skip-newlines!))) + (rb-p-idx-loop))))) + (rb-p-idx-loop) + (rb-p-expect! "rbracket") + (set! node {:type "index" :recv node :args idxargs}) + (rb-p-pf-loop)))) + (:else nil)))) + (rb-p-pf-loop) + node))) + + (define rb-p-parse-unary + (fn () + (cond + ((and (= (rb-p-type) "op") (= (rb-p-val) "!")) + (do (rb-p-advance!) + {:type "unop" :op "!" :value (rb-p-parse-unary)})) + ((and (= (rb-p-type) "op") (= (rb-p-val) "~")) + (do (rb-p-advance!) + {:type "unop" :op "~" :value (rb-p-parse-unary)})) + ((and (= (rb-p-type) "op") (= (rb-p-val) "-")) + (do (rb-p-advance!) + {:type "unop" :op "-" :value (rb-p-parse-unary)})) + ((and (= (rb-p-type) "op") (= (rb-p-val) "+")) + (do (rb-p-advance!) (rb-p-parse-unary))) + (:else (rb-p-parse-postfix))))) + + (define rb-p-parse-power + (fn () + (let ((node (rb-p-parse-unary))) + (if (and (= (rb-p-type) "op") (= (rb-p-val) "**")) + (do (rb-p-advance!) + {:type "binop" :op "**" :left node :right (rb-p-parse-power)}) + node)))) + + (define rb-p-parse-mul + (fn () + (let ((node (rb-p-parse-power))) + (define rb-p-mul-loop + (fn () + (if (and (= (rb-p-type) "op") + (or (= (rb-p-val) "*") (= (rb-p-val) "/") (= (rb-p-val) "%"))) + (let ((op (rb-p-val))) + (rb-p-advance!) + (set! node {:type "binop" :op op :left node :right (rb-p-parse-power)}) + (rb-p-mul-loop)) + node))) + (rb-p-mul-loop)))) + + (define rb-p-parse-add + (fn () + (let ((node (rb-p-parse-mul))) + (define rb-p-add-loop + (fn () + (if (and (= (rb-p-type) "op") + (or (= (rb-p-val) "+") (= (rb-p-val) "-"))) + (let ((op (rb-p-val))) + (rb-p-advance!) + (set! node {:type "binop" :op op :left node :right (rb-p-parse-mul)}) + (rb-p-add-loop)) + node))) + (rb-p-add-loop)))) + + (define rb-p-parse-shift + (fn () + (let ((node (rb-p-parse-add))) + (define rb-p-sh-loop + (fn () + (if (and (= (rb-p-type) "op") + (or (= (rb-p-val) "<<") (= (rb-p-val) ">>"))) + (let ((op (rb-p-val))) + (rb-p-advance!) + (set! node {:type "binop" :op op :left node :right (rb-p-parse-add)}) + (rb-p-sh-loop)) + node))) + (rb-p-sh-loop)))) + + (define rb-p-parse-bitand + (fn () + (let ((node (rb-p-parse-shift))) + (define rb-p-ba-loop + (fn () + (if (and (= (rb-p-type) "op") (= (rb-p-val) "&")) + (do + (rb-p-advance!) + (set! node {:type "binop" :op "&" :left node :right (rb-p-parse-shift)}) + (rb-p-ba-loop)) + node))) + (rb-p-ba-loop)))) + + ;; | is "pipe" token (not "op") + (define rb-p-parse-bitor + (fn () + (let ((node (rb-p-parse-bitand))) + (define rb-p-bo-loop + (fn () + (cond + ((= (rb-p-type) "pipe") + (do + (rb-p-advance!) + (set! node {:type "binop" :op "|" :left node :right (rb-p-parse-bitand)}) + (rb-p-bo-loop))) + ((and (= (rb-p-type) "op") (= (rb-p-val) "^")) + (do + (rb-p-advance!) + (set! node {:type "binop" :op "^" :left node :right (rb-p-parse-bitand)}) + (rb-p-bo-loop))) + (:else node)))) + (rb-p-bo-loop)))) + + (define rb-p-parse-comparison + (fn () + (let ((node (rb-p-parse-bitor))) + (if (and (= (rb-p-type) "op") + (contains? (list "==" "!=" "<" ">" "<=" ">=" + "<=>" "===" "=~" "!~") (rb-p-val))) + (let ((op (rb-p-val))) + (rb-p-advance!) + {:type "binop" :op op :left node :right (rb-p-parse-bitor)}) + node)))) + + (define rb-p-parse-not + (fn () + (if (and (= (rb-p-type) "keyword") (= (rb-p-val) "not")) + (do (rb-p-advance!) + {:type "not" :value (rb-p-parse-not)}) + (rb-p-parse-comparison)))) + + (define rb-p-parse-and + (fn () + (let ((node (rb-p-parse-not))) + (define rb-p-and-loop + (fn () + (cond + ((and (= (rb-p-type) "op") (= (rb-p-val) "&&")) + (do + (rb-p-advance!) + (set! node {:type "binop" :op "&&" :left node :right (rb-p-parse-not)}) + (rb-p-and-loop))) + ((and (= (rb-p-type) "keyword") (= (rb-p-val) "and")) + (do + (rb-p-advance!) + (set! node {:type "binop" :op "and" :left node :right (rb-p-parse-not)}) + (rb-p-and-loop))) + (:else node)))) + (rb-p-and-loop)))) + + (define rb-p-parse-or + (fn () + (let ((node (rb-p-parse-and))) + (define rb-p-or-loop + (fn () + (cond + ((and (= (rb-p-type) "op") (= (rb-p-val) "||")) + (do + (rb-p-advance!) + (set! node {:type "binop" :op "||" :left node :right (rb-p-parse-and)}) + (rb-p-or-loop))) + ((and (= (rb-p-type) "keyword") (= (rb-p-val) "or")) + (do + (rb-p-advance!) + (set! node {:type "binop" :op "or" :left node :right (rb-p-parse-and)}) + (rb-p-or-loop))) + (:else node)))) + (rb-p-or-loop)))) + + (define rb-p-parse-range + (fn () + (let ((node (rb-p-parse-or))) + (cond + ((= (rb-p-type) "dotdot") + (do (rb-p-advance!) + {:type "range" :from node :to (rb-p-parse-or) :exclusive false})) + ((= (rb-p-type) "dotdotdot") + (do (rb-p-advance!) + {:type "range" :from node :to (rb-p-parse-or) :exclusive true})) + (:else node))))) + + (define rb-p-parse-assign + (fn () + (let ((node (rb-p-parse-range))) + (cond + ((and (= (rb-p-type) "op") (= (rb-p-val) "=")) + (do (rb-p-advance!) + {:type "assign" :target node :value (rb-p-parse-assign)})) + ((and (= (rb-p-type) "op") + (contains? (list "+=" "-=" "*=" "/=" "%=" "**=" + "<<=" ">>=" "&=" "|=" "^=" "&&=" "||=") + (rb-p-val))) + (let ((op (substring (rb-p-val) 0 (- (len (rb-p-val)) 1)))) + (rb-p-advance!) + {:type "op-assign" :target node :op op :value (rb-p-parse-assign)})) + (:else node))))) + + (define rb-p-parse-expr + (fn () (rb-p-parse-assign))) + + ;; e, e, ... → single node or array + (define rb-p-parse-multi-val + (fn () + (let ((vals (list))) + (define rb-p-mv-loop + (fn () + (append! vals (rb-p-parse-assign)) + (when (= (rb-p-type) "comma") + (do (rb-p-advance!) (rb-p-skip-newlines!) (rb-p-mv-loop))))) + (rb-p-mv-loop) + (if (= (len vals) 1) + (nth vals 0) + {:type "array" :elems vals})))) + + ;; a, b, *c = rhs + (define rb-p-parse-massign + (fn () + (let ((targets (list))) + (define rb-p-ma-loop + (fn () + (cond + ((and (= (rb-p-type) "op") (= (rb-p-val) "*")) + (do + (rb-p-advance!) + (if (= (rb-p-type) "ident") + (do + (append! targets {:type "splat-target" :name (rb-p-val)}) + (rb-p-advance!)) + (append! targets {:type "splat-target" :name nil})))) + ((= (rb-p-type) "ident") + (do (append! targets {:type "lvar" :name (rb-p-val)}) (rb-p-advance!))) + ((= (rb-p-type) "ivar") + (do (append! targets {:type "ivar" :name (rb-p-val)}) (rb-p-advance!))) + ((= (rb-p-type) "cvar") + (do (append! targets {:type "cvar" :name (rb-p-val)}) (rb-p-advance!))) + ((= (rb-p-type) "gvar") + (do (append! targets {:type "gvar" :name (rb-p-val)}) (rb-p-advance!))) + ((= (rb-p-type) "const") + (do (append! targets {:type "const" :name (rb-p-val)}) (rb-p-advance!))) + (:else nil)) + (when (= (rb-p-type) "comma") + (do (rb-p-advance!) (rb-p-skip-newlines!) (rb-p-ma-loop))))) + (rb-p-ma-loop) + (rb-p-advance!) + {:type "massign" :targets targets :value (rb-p-parse-multi-val)}))) + + (define rb-p-parse-stmt + (fn () + (cond + ((and (= (rb-p-type) "keyword") (= (rb-p-val) "def")) + (rb-p-parse-def)) + ((and (= (rb-p-type) "keyword") (= (rb-p-val) "class")) + (rb-p-parse-class)) + ((and (= (rb-p-type) "keyword") (= (rb-p-val) "module")) + (rb-p-parse-module)) + ((and (= (rb-p-type) "keyword") (= (rb-p-val) "return")) + (do (rb-p-advance!) + {:type "return" + :value (if (or (rb-p-sep?) (= (rb-p-type) "eof")) + nil (rb-p-parse-multi-val))})) + ((and (= (rb-p-type) "keyword") (= (rb-p-val) "yield")) + (do (rb-p-advance!) + {:type "yield" + :args (cond + ((= (rb-p-type) "lparen") (rb-p-parse-args-parens)) + ((or (rb-p-sep?) (= (rb-p-type) "eof")) (list)) + (:else (rb-p-parse-args-bare)))})) + ((and (= (rb-p-type) "keyword") (= (rb-p-val) "break")) + (do (rb-p-advance!) + {:type "break" + :value (if (or (rb-p-sep?) (= (rb-p-type) "eof")) + nil (rb-p-parse-expr))})) + ((and (= (rb-p-type) "keyword") (= (rb-p-val) "next")) + (do (rb-p-advance!) + {:type "next" + :value (if (or (rb-p-sep?) (= (rb-p-type) "eof")) + nil (rb-p-parse-expr))})) + ((and (= (rb-p-type) "keyword") (= (rb-p-val) "redo")) + (do (rb-p-advance!) {:type "redo"})) + ((and (= (rb-p-type) "keyword") (= (rb-p-val) "raise")) + (do (rb-p-advance!) + {:type "raise" + :value (if (or (rb-p-sep?) (= (rb-p-type) "eof")) + nil (rb-p-parse-expr))})) + ;; Massign: token followed by comma + ((and (or (= (rb-p-type) "ident") (= (rb-p-type) "ivar") + (= (rb-p-type) "cvar") (= (rb-p-type) "gvar") + (= (rb-p-type) "const")) + (= (get (rb-p-peek 1) :type) "comma")) + (rb-p-parse-massign)) + (:else + (let ((node (rb-p-parse-assign))) + (if (and (= (get node :type) "send") + (= (len (get node :args)) 0) + (nil? (get node :block))) + ;; Bare send: check for block or no-paren args + (cond + ;; Block immediately follows (do or {) + ((or (and (= (rb-p-type) "keyword") (= (rb-p-val) "do")) + (= (rb-p-type) "lbrace")) + (let ((blk (rb-p-parse-block))) + {:type "send" :name (get node :name) :args (list) :block blk})) + ;; No-paren args (stop before block/sep/end keywords) + ((and (not (rb-p-sep?)) + (not (= (rb-p-type) "eof")) + (not (= (rb-p-type) "op")) + (not (= (rb-p-type) "dot")) + (not (= (rb-p-type) "dcolon")) + (not (= (rb-p-type) "rparen")) + (not (= (rb-p-type) "rbracket")) + (not (= (rb-p-type) "rbrace")) + (not (= (rb-p-type) "lbrace")) + (not (and (= (rb-p-type) "keyword") + (contains? (list "end" "else" "elsif" "when" + "rescue" "ensure" "then" "do" + "and" "or" "not") + (rb-p-val))))) + (let ((args (rb-p-parse-args-bare)) + (blk (rb-p-parse-block))) + (if (> (len args) 0) + {:type "send" :name (get node :name) :args args :block blk} + node))) + (:else node)) + node)))))) + + (define rb-p-parse-stmts + (fn (terminators) + (let ((stmts (list))) + (define rb-p-at-term? + (fn () + (or (= (rb-p-type) "eof") + (and (= (rb-p-type) "keyword") + (contains? terminators (rb-p-val))) + (and (= (rb-p-type) "rbrace") + (contains? terminators "rbrace"))))) + (define rb-p-ps-loop + (fn () + (rb-p-skip-seps!) + (when (not (rb-p-at-term?)) + (do + (append! stmts (rb-p-parse-stmt)) + (rb-p-skip-seps!) + (rb-p-ps-loop))))) + (rb-p-ps-loop) + stmts))) + + {:type "program" :stmts (rb-p-parse-stmts (list))}))) + +(define rb-parse-str + (fn (src) (rb-parse (rb-tokenize src)))) diff --git a/lib/ruby/test.sh b/lib/ruby/test.sh index 861e1c62..6e3ad428 100755 --- a/lib/ruby/test.sh +++ b/lib/ruby/test.sh @@ -3,7 +3,7 @@ # Usage: # bash lib/ruby/test.sh # run all tests # bash lib/ruby/test.sh -v # verbose -# bash lib/ruby/test.sh tests/tokenizer.sx # single file +# bash lib/ruby/test.sh tests/parse.sx # single file set -euo pipefail cd "$(git rev-parse --show-toplevel)" @@ -39,32 +39,39 @@ FAILED_FILES=() for FILE in "${FILES[@]}"; do [ -f "$FILE" ] || { echo "skip $FILE (not found)"; continue; } TMPFILE=$(mktemp) - cat > "$TMPFILE" < "$TMPFILE" OUTPUT=$(timeout 60 "$SX_SERVER" < "$TMPFILE" 2>&1 || true) rm -f "$TMPFILE" - LINE=$(echo "$OUTPUT" | awk '/^\(ok-len 3 / {getline; print; exit}') + # Extract epoch 4 result: (ok-len 4 N)\n or (ok 4 ) + LINE=$(printf '%s\n' "$OUTPUT" | awk '/^\(ok-len 4 / {getline; print; exit}') if [ -z "$LINE" ]; then - LINE=$(echo "$OUTPUT" | grep -E '^\(ok 3 \([0-9]+ [0-9]+\)\)' | tail -1 \ - | sed -E 's/^\(ok 3 //; s/\)$//') + LINE=$(printf '%s\n' "$OUTPUT" \ + | grep -E '^\(ok 4 \([0-9]+ [0-9]+\)\)' | tail -1 \ + | sed -E 's/^\(ok 4 //; s/\)$//') fi if [ -z "$LINE" ]; then echo "✗ $FILE: could not extract summary" - echo "$OUTPUT" | tail -20 + printf '%s\n' "$OUTPUT" | grep -v '^(ok ' | tail -10 TOTAL_FAIL=$((TOTAL_FAIL + 1)) FAILED_FILES+=("$FILE") continue fi - P=$(echo "$LINE" | sed -E 's/^\(([0-9]+) ([0-9]+)\).*/\1/') - F=$(echo "$LINE" | sed -E 's/^\(([0-9]+) ([0-9]+)\).*/\2/') + P=$(printf '%s\n' "$LINE" | sed -E 's/^\(([0-9]+) ([0-9]+)\).*/\1/') + F=$(printf '%s\n' "$LINE" | sed -E 's/^\(([0-9]+) ([0-9]+)\).*/\2/') TOTAL_PASS=$((TOTAL_PASS + P)) TOTAL_FAIL=$((TOTAL_FAIL + F)) if [ "$F" -gt 0 ]; then diff --git a/lib/ruby/tests/parse.sx b/lib/ruby/tests/parse.sx new file mode 100644 index 00000000..301b2407 --- /dev/null +++ b/lib/ruby/tests/parse.sx @@ -0,0 +1,439 @@ +;; Parser tests for Ruby 2.7 subset. + +(define rb-deep=? + (fn (a b) + (cond + ((= a b) true) + ((and (dict? a) (dict? b)) + (let ((ak (keys a)) (bk (keys b))) + (if (not (= (len ak) (len bk))) + false + (every? + (fn (k) + (and (has-key? b k) (rb-deep=? (get a k) (get b k)))) + ak)))) + ((and (list? a) (list? b)) + (if (not (= (len a) (len b))) + false + (let ((i 0) (ok true)) + (define rb-de-loop + (fn () + (when (and ok (< i (len a))) + (do + (when (not (rb-deep=? (nth a i) (nth b i))) + (set! ok false)) + (set! i (+ i 1)) + (rb-de-loop))))) + (rb-de-loop) + ok))) + (:else false)))) + +(define rb-test-pass 0) +(define rb-test-fail 0) +(define rb-test-fails (list)) + +(define rb-test + (fn (name actual expected) + (if (rb-deep=? actual expected) + (set! rb-test-pass (+ rb-test-pass 1)) + (do + (set! rb-test-fail (+ rb-test-fail 1)) + (append! rb-test-fails {:name name :actual actual :expected expected}))))) + +;; Shorthand: parse src and extract :stmts list +(define rb-p-stmts + (fn (src) + (get (rb-parse-str src) :stmts))) + +;; Shorthand: parse and get first statement +(define rb-p-first + (fn (src) + (nth (rb-p-stmts src) 0))) + +;; ── Literals ───────────────────────────────────────────────────────────────── + +(rb-test "int literal" + (rb-p-first "42") + {:type "lit-int" :value 42}) + +(rb-test "negative int" + (rb-p-first "-7") + {:type "unop" :op "-" :value {:type "lit-int" :value 7}}) + +(rb-test "float literal" + (rb-p-first "3.14") + {:type "lit-float" :value "3.14"}) + +(rb-test "string literal" + (rb-p-first "\"hello\"") + {:type "lit-str" :value "hello"}) + +(rb-test "symbol literal" + (rb-p-first ":foo") + {:type "lit-sym" :value "foo"}) + +(rb-test "nil literal" + (rb-p-first "nil") + {:type "lit-nil"}) + +(rb-test "true literal" + (rb-p-first "true") + {:type "lit-bool" :value true}) + +(rb-test "false literal" + (rb-p-first "false") + {:type "lit-bool" :value false}) + +(rb-test "self" + (rb-p-first "self") + {:type "self"}) + +(rb-test "%w[] words" + (rb-p-first "%w[a b c]") + {:type "lit-words" :elems (list "a" "b" "c")}) + +(rb-test "%i[] isymbols" + (rb-p-first "%i[x y]") + {:type "lit-isyms" :elems (list "x" "y")}) + +;; ── Variables ───────────────────────────────────────────────────────────────── + +(rb-test "local var / send" + (rb-p-first "x") + {:type "send" :name "x" :args (list) :block nil}) + +(rb-test "ivar" + (rb-p-first "@foo") + {:type "ivar" :name "@foo"}) + +(rb-test "cvar" + (rb-p-first "@@count") + {:type "cvar" :name "@@count"}) + +(rb-test "gvar" + (rb-p-first "$stdout") + {:type "gvar" :name "$stdout"}) + +(rb-test "constant" + (rb-p-first "Foo") + {:type "const" :name "Foo"}) + +(rb-test "const path" + (rb-p-first "Foo::Bar") + {:type "const-path" + :left {:type "const" :name "Foo"} + :name "Bar"}) + +(rb-test "triple const path" + (rb-p-first "A::B::C") + {:type "const-path" + :left {:type "const-path" + :left {:type "const" :name "A"} + :name "B"} + :name "C"}) + +;; ── Arrays and Hashes ───────────────────────────────────────────────────────── + +(rb-test "empty array" + (rb-p-first "[]") + {:type "array" :elems (list)}) + +(rb-test "array literal" + (rb-p-first "[1, 2, 3]") + {:type "array" :elems (list {:type "lit-int" :value 1} + {:type "lit-int" :value 2} + {:type "lit-int" :value 3})}) + +(rb-test "hash colon style" + (get (rb-p-first "{a: 1}") :type) + "hash") + +(rb-test "hash pair style" + (get (nth (get (rb-p-first "{a: 1}") :pairs) 0) :style) + "colon") + +(rb-test "hash symbol key" + (get (get (nth (get (rb-p-first "{a: 1}") :pairs) 0) :key) :value) + "a") + +;; ── Binary operators ────────────────────────────────────────────────────────── + +(rb-test "addition" + (rb-p-first "1 + 2") + {:type "binop" :op "+" + :left {:type "lit-int" :value 1} + :right {:type "lit-int" :value 2}}) + +(rb-test "subtraction" + (get (rb-p-first "a - b") :op) + "-") + +(rb-test "multiplication" + (get (rb-p-first "x * y") :op) + "*") + +(rb-test "precedence: * before +" + (rb-p-first "1 + 2 * 3") + {:type "binop" :op "+" + :left {:type "lit-int" :value 1} + :right {:type "binop" :op "*" + :left {:type "lit-int" :value 2} + :right {:type "lit-int" :value 3}}}) + +(rb-test "power right-assoc" + (rb-p-first "2 ** 3 ** 4") + {:type "binop" :op "**" + :left {:type "lit-int" :value 2} + :right {:type "binop" :op "**" + :left {:type "lit-int" :value 3} + :right {:type "lit-int" :value 4}}}) + +(rb-test "equality" + (get (rb-p-first "a == b") :op) + "==") + +(rb-test "logical and" + (get (rb-p-first "a && b") :op) + "&&") + +(rb-test "logical or" + (get (rb-p-first "a || b") :op) + "||") + +(rb-test "range inclusive" + (rb-p-first "1..5") + {:type "range" + :from {:type "lit-int" :value 1} + :to {:type "lit-int" :value 5} + :exclusive false}) + +(rb-test "range exclusive" + (get (rb-p-first "1...5") :exclusive) + true) + +;; ── Assignment ──────────────────────────────────────────────────────────────── + +(rb-test "assign" + (rb-p-first "x = 1") + {:type "assign" + :target {:type "send" :name "x" :args (list) :block nil} + :value {:type "lit-int" :value 1}}) + +(rb-test "op-assign +=" + (get (rb-p-first "x += 1") :type) + "op-assign") + +(rb-test "op-assign op" + (get (rb-p-first "x += 1") :op) + "+") + +(rb-test "massign" + (get (rb-p-first "a, b = 1, 2") :type) + "massign") + +(rb-test "massign targets" + (len (get (rb-p-first "a, b = 1, 2") :targets)) + 2) + +(rb-test "massign value array" + (get (get (rb-p-first "a, b = 1, 2") :value) :type) + "array") + +;; ── Method calls ────────────────────────────────────────────────────────────── + +(rb-test "call with parens" + (rb-p-first "foo(1, 2)") + {:type "send" :name "foo" + :args (list {:type "lit-int" :value 1} + {:type "lit-int" :value 2}) + :block nil}) + +(rb-test "chained call" + (get (rb-p-first "obj.foo") :type) + "call") + +(rb-test "chained call method" + (get (rb-p-first "obj.foo") :method) + "foo") + +(rb-test "chained call with args" + (len (get (rb-p-first "obj.foo(1, 2)") :args)) + 2) + +(rb-test "no-paren call" + (get (rb-p-first "puts \"hello\"") :type) + "send") + +(rb-test "no-paren call name" + (get (rb-p-first "puts \"hello\"") :name) + "puts") + +(rb-test "no-paren call args" + (len (get (rb-p-first "puts \"hello\"") :args)) + 1) + +(rb-test "indexing" + (get (rb-p-first "a[0]") :type) + "index") + +;; ── Unary operators ─────────────────────────────────────────────────────────── + +(rb-test "unary not" + (rb-p-first "!x") + {:type "unop" :op "!" + :value {:type "send" :name "x" :args (list) :block nil}}) + +(rb-test "unary minus" + (get (rb-p-first "-x") :op) + "-") + +;; ── Method def ──────────────────────────────────────────────────────────────── + +(rb-test "empty method def" + (get (rb-p-first "def foo; end") :type) + "method-def") + +(rb-test "method def name" + (get (rb-p-first "def foo; end") :name) + "foo") + +(rb-test "method def no params" + (len (get (rb-p-first "def foo; end") :params)) + 0) + +(rb-test "method def with params" + (len (get (rb-p-first "def foo(a, b); end") :params)) + 2) + +(rb-test "method def param-req" + (get (nth (get (rb-p-first "def foo(a); end") :params) 0) :type) + "param-req") + +(rb-test "method def param name" + (get (nth (get (rb-p-first "def foo(a); end") :params) 0) :name) + "a") + +(rb-test "method def optional param" + (get (nth (get (rb-p-first "def foo(a, b=1); end") :params) 1) :type) + "param-opt") + +(rb-test "method def splat" + (get (nth (get (rb-p-first "def foo(*args); end") :params) 0) :type) + "param-rest") + +(rb-test "method def double splat" + (get (nth (get (rb-p-first "def foo(**opts); end") :params) 0) :type) + "param-kwrest") + +(rb-test "method def block param" + (get (nth (get (rb-p-first "def foo(&blk); end") :params) 0) :type) + "param-block") + +(rb-test "method def all param types" + (len (get (rb-p-first "def foo(a, b=1, *c, **d, &e); end") :params)) + 5) + +(rb-test "method def singleton recv" + (get (get (rb-p-first "def self.bar; end") :recv) :type) + "self") + +(rb-test "method def body" + (len (get (rb-p-first "def foo; 1; 2; end") :body)) + 2) + +;; ── Class def ──────────────────────────────────────────────────────────────── + +(rb-test "class def type" + (get (rb-p-first "class Foo; end") :type) + "class-def") + +(rb-test "class def name" + (get (get (rb-p-first "class Foo; end") :name) :name) + "Foo") + +(rb-test "class def no super" + (nil? (get (rb-p-first "class Foo; end") :super)) + true) + +(rb-test "class def with super" + (get (get (rb-p-first "class Foo < Bar; end") :super) :name) + "Bar") + +(rb-test "singleton class" + (get (rb-p-first "class << self; end") :type) + "sclass") + +;; ── Module def ──────────────────────────────────────────────────────────────── + +(rb-test "module def type" + (get (rb-p-first "module M; end") :type) + "module-def") + +(rb-test "module def name" + (get (get (rb-p-first "module M; end") :name) :name) + "M") + +;; ── Blocks ──────────────────────────────────────────────────────────────────── + +(rb-test "block do...end" + (get (get (rb-p-first "foo do |x| x end") :block) :type) + "block") + +(rb-test "block brace" + (get (get (rb-p-first "foo { |x| x }") :block) :type) + "block") + +(rb-test "block params" + (len (get (get (rb-p-first "foo { |a, b| a }") :block) :params)) + 2) + +(rb-test "block no params" + (len (get (get (rb-p-first "foo { 42 }") :block) :params)) + 0) + +;; ── Control flow ────────────────────────────────────────────────────────────── + +(rb-test "return type" + (get (rb-p-first "return 1") :type) + "return") + +(rb-test "return value" + (get (get (rb-p-first "return 1") :value) :value) + 1) + +(rb-test "return nil" + (nil? (get (rb-p-first "return") :value)) + true) + +(rb-test "yield type" + (get (rb-p-first "yield 1") :type) + "yield") + +(rb-test "break type" + (get (rb-p-first "break") :type) + "break") + +(rb-test "next type" + (get (rb-p-first "next") :type) + "next") + +(rb-test "redo type" + (get (rb-p-first "redo") :type) + "redo") + +;; ── Multi-statement program ─────────────────────────────────────────────────── + +(rb-test "two statements" + (len (rb-p-stmts "1\n2")) + 2) + +(rb-test "semi-separated" + (len (rb-p-stmts "1; 2; 3")) + 3) + +(rb-test "class with method" + (let ((cls (rb-p-first "class Foo\n def bar\n 1\n end\nend"))) + (len (get cls :body))) + 1) + +(list rb-test-pass rb-test-fail) diff --git a/plans/ruby-on-sx.md b/plans/ruby-on-sx.md index 30d49960..96577326 100644 --- a/plans/ruby-on-sx.md +++ b/plans/ruby-on-sx.md @@ -52,10 +52,10 @@ Core mapping: ### Phase 1 — tokenizer + parser - [x] Tokenizer: keywords (`def end class module if unless while until do return yield begin rescue ensure case when then else elsif`), identifiers (lowercase = local/method, `@` = ivar, `@@` = cvar, `$` = global, uppercase = constant), numbers (int, float, `0x` `0o` `0b`, `_` separators), strings (`"…"` interpolation, `'…'` literal, `%w[a b c]`, `%i[a b c]`), symbols `:foo` `:"…"`, operators (`+ - * / % ** == != < > <= >= <=> === =~ !~ << >> & | ^ ~ ! && || and or not`), `:: . , ; ( ) [ ] { } -> => |`, comments `#` -- [ ] Parser: program is sequence of statements separated by newlines or `;`; method def `def name(args) … end`; class `class Foo < Bar … end`; module `module M … end`; block `do |a, b| … end` and `{ |a, b| … }`; call sugar (no parens), `obj.method`, `Mod::Const`; arg shapes (positional, default, splat `*args`, double-splat `**opts`, block `&blk`) +- [x] Parser: program is sequence of statements separated by newlines or `;`; method def `def name(args) … end`; class `class Foo < Bar … end`; module `module M … end`; block `do |a, b| … end` and `{ |a, b| … }`; call sugar (no parens), `obj.method`, `Mod::Const`; arg shapes (positional, default, splat `*args`, double-splat `**opts`, block `&blk`) - [ ] If/while/case expressions (return values), `unless`/`until`, postfix modifiers - [ ] Begin/rescue/ensure/retry, raise, raise with class+message -- [ ] Unit tests in `lib/ruby/tests/parse.sx` +- [x] Unit tests in `lib/ruby/tests/parse.sx` ### Phase 2 — object model + sequential eval - [ ] Class table bootstrap: `BasicObject`, `Object`, `Kernel`, `Module`, `Class`, `Numeric`, `Integer`, `Float`, `String`, `Symbol`, `Array`, `Hash`, `Range`, `NilClass`, `TrueClass`, `FalseClass`, `Proc`, `Method` @@ -117,6 +117,7 @@ Core mapping: _Newest first._ +- 2026-04-25: Phase 1 parser complete — `lib/ruby/parser.sx` (rb-parse/rb-parse-str) + `lib/ruby/tests/parse.sx` (83/83 tests). Program, method-def (all param shapes), class/module/sclass, blocks (do/brace), method calls (parens + no-parens + chains), const-path, assignment (=, op=, massign), binary/unary ops with precedence, array/hash literals, return/yield/break/next/redo/raise, indexing. - 2026-04-25: Phase 1 tokenizer complete — `lib/ruby/tokenizer.sx` + `lib/ruby/tests/tokenizer.sx` (107/107 tests). Keywords, identifiers (@ivar @@cvar $gvar), numbers (dec/hex/octal/binary/float), strings (dq with interpolation kept raw, sq), symbols, %w/%i literals, operators (all compound forms), punctuation, comments, line/col tracking. ## Blockers