From 023bc2d80c57a5f5cbbcb619a5c8dbc569358bf0 Mon Sep 17 00:00:00 2001 From: giles Date: Wed, 6 May 2026 22:14:10 +0000 Subject: [PATCH] =?UTF-8?q?sx:=20step=203=20=E2=80=94=20add=20:end=20and?= =?UTF-8?q?=20:line=20to=20hs=20tokenizer=20tokens?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extend hs-make-token to (type value pos &rest extras) producing dicts {:pos :end :line :value :type}. End defaults to pos+len(value); line defaults to 1. Both tokenize loops now track current-line via newline counting in advance!. hs-emit! and t-emit! pass the right end and start-line to the constructor; redundant dict-set! after construction removed. Mirror copied to shared/static/wasm/sx/hs-tokenizer.sx (byte-identical). Verify: (hs-make-token "NUMBER" "1" 0) returns {:pos 0 :end 1 :line 1 :value "1" :type "NUMBER"}. OCaml suite: 4529 pass, 1339 pre-existing failures (baseline). All 4/4 hs-upstream-core/sourceInfo tests now pass (was 2/4 — closes E38). Co-Authored-By: Claude Opus 4.7 (1M context) --- lib/hyperscript/tokenizer.sx | 68 ++++++++++++++++++++----- plans/sx-improvements.md | 2 +- shared/static/wasm/sx/hs-tokenizer.sx | 71 +++++++++++++++++++++------ 3 files changed, 113 insertions(+), 28 deletions(-) diff --git a/lib/hyperscript/tokenizer.sx b/lib/hyperscript/tokenizer.sx index a8ac771b..25992902 100644 --- a/lib/hyperscript/tokenizer.sx +++ b/lib/hyperscript/tokenizer.sx @@ -8,7 +8,17 @@ ;; ── Token constructor ───────────────────────────────────────────── -(define hs-make-token (fn (type value pos) {:pos pos :value value :type type})) +(define hs-make-token + (fn (type value pos &rest extras) + (let + ((end-arg (if (>= (len extras) 1) (nth extras 0) nil)) + (line-arg (if (>= (len extras) 2) (nth extras 1) nil))) + (let + ((end (if (nil? end-arg) + (+ pos (if (nil? value) 0 (len (str value)))) + end-arg)) + (line (if (nil? line-arg) 1 line-arg))) + {:pos pos :end end :line line :value value :type type})))) ;; ── Character predicates ────────────────────────────────────────── @@ -221,14 +231,26 @@ (fn (src) (let - ((tokens (list)) (pos 0) (src-len (len src))) + ((tokens (list)) (pos 0) (src-len (len src)) (current-line 1)) (define hs-peek (fn (offset) (if (< (+ pos offset) src-len) (nth src (+ pos offset)) nil))) (define hs-cur (fn () (hs-peek 0))) - (define hs-advance! (fn (n) (set! pos (+ pos n)))) + (define + hs-advance! + (fn (n) + (let ((new-pos (+ pos n))) + (define + count-nl! + (fn (i) + (when (< i new-pos) + (when (= (nth src i) "\n") + (set! current-line (+ current-line 1))) + (count-nl! (+ i 1))))) + (count-nl! pos) + (set! pos new-pos)))) (define skip-ws! (fn @@ -502,13 +524,14 @@ (fn (type value start) (let - ((tok (hs-make-token type value start)) - (end-pos - (max pos (+ start (if (nil? value) 0 (len (str value))))))) - (do - (dict-set! tok "end" end-pos) - (dict-set! tok "line" (len (split (slice src 0 start) "\n"))) - (append! tokens tok))))) + ((end-pos + (max pos (+ start (if (nil? value) 0 (len (str value)))))) + (newlines-after-start + (- (len (split (slice src start (max start pos)) "\n")) 1)) + (start-line (- current-line newlines-after-start))) + (append! + tokens + (hs-make-token type value start end-pos start-line))))) (define scan! (fn @@ -758,11 +781,30 @@ (fn (src) (let - ((tokens (list)) (pos 0) (src-len (len src))) + ((tokens (list)) (pos 0) (src-len (len src)) (current-line 1)) (define t-cur (fn () (if (< pos src-len) (nth src pos) nil))) (define t-peek (fn (n) (if (< (+ pos n) src-len) (nth src (+ pos n)) nil))) - (define t-advance! (fn (n) (set! pos (+ pos n)))) - (define t-emit! (fn (type value) (append! tokens (hs-make-token type value pos)))) + (define + t-advance! + (fn (n) + (let ((new-pos (+ pos n))) + (define + t-count-nl! + (fn (i) + (when (< i new-pos) + (when (= (nth src i) "\n") + (set! current-line (+ current-line 1))) + (t-count-nl! (+ i 1))))) + (t-count-nl! pos) + (set! pos new-pos)))) + (define + t-emit! + (fn (type value) + (let + ((end-pos (+ pos (if (nil? value) 0 (len (str value)))))) + (append! + tokens + (hs-make-token type value pos end-pos current-line))))) (define scan-to-close! (fn diff --git a/plans/sx-improvements.md b/plans/sx-improvements.md index 4caf5d4a..cb0835fc 100644 --- a/plans/sx-improvements.md +++ b/plans/sx-improvements.md @@ -183,7 +183,7 @@ these when operands are known numbers/lists. |------|--------|--------| | 1 — JIT combinator bug | [x] | 882a4b76 | | 2 — letrec+resume | [x] | e80e655b | -| 3 — tokenizer :end/:line | [ ] | — | +| 3 — tokenizer :end/:line | [x] | (pending) | | 4 — parser spans complete | [ ] | — | | 5 — OCaml AdtValue + define-type + match | [ ] | — | | 6 — JS AdtValue + define-type + match | [ ] | — | diff --git a/shared/static/wasm/sx/hs-tokenizer.sx b/shared/static/wasm/sx/hs-tokenizer.sx index 4824b51d..25992902 100644 --- a/shared/static/wasm/sx/hs-tokenizer.sx +++ b/shared/static/wasm/sx/hs-tokenizer.sx @@ -8,7 +8,17 @@ ;; ── Token constructor ───────────────────────────────────────────── -(define hs-make-token (fn (type value pos) {:pos pos :value value :type type})) +(define hs-make-token + (fn (type value pos &rest extras) + (let + ((end-arg (if (>= (len extras) 1) (nth extras 0) nil)) + (line-arg (if (>= (len extras) 2) (nth extras 1) nil))) + (let + ((end (if (nil? end-arg) + (+ pos (if (nil? value) 0 (len (str value)))) + end-arg)) + (line (if (nil? line-arg) 1 line-arg))) + {:pos pos :end end :line line :value value :type type})))) ;; ── Character predicates ────────────────────────────────────────── @@ -221,14 +231,26 @@ (fn (src) (let - ((tokens (list)) (pos 0) (src-len (len src))) + ((tokens (list)) (pos 0) (src-len (len src)) (current-line 1)) (define hs-peek (fn (offset) (if (< (+ pos offset) src-len) (nth src (+ pos offset)) nil))) (define hs-cur (fn () (hs-peek 0))) - (define hs-advance! (fn (n) (set! pos (+ pos n)))) + (define + hs-advance! + (fn (n) + (let ((new-pos (+ pos n))) + (define + count-nl! + (fn (i) + (when (< i new-pos) + (when (= (nth src i) "\n") + (set! current-line (+ current-line 1))) + (count-nl! (+ i 1))))) + (count-nl! pos) + (set! pos new-pos)))) (define skip-ws! (fn @@ -502,13 +524,14 @@ (fn (type value start) (let - ((tok (hs-make-token type value start)) - (end-pos - (max pos (+ start (if (nil? value) 0 (len (str value))))))) - (do - (dict-set! tok "end" end-pos) - (dict-set! tok "line" (len (split (slice src 0 start) "\n"))) - (append! tokens tok))))) + ((end-pos + (max pos (+ start (if (nil? value) 0 (len (str value)))))) + (newlines-after-start + (- (len (split (slice src start (max start pos)) "\n")) 1)) + (start-line (- current-line newlines-after-start))) + (append! + tokens + (hs-make-token type value start end-pos start-line))))) (define scan! (fn @@ -538,7 +561,8 @@ (= (hs-peek 1) "#") (= (hs-peek 1) "[") (= (hs-peek 1) "*") - (= (hs-peek 1) ":"))) + (= (hs-peek 1) ":") + (= (hs-peek 1) "$"))) (do (hs-emit! "selector" (read-selector) start) (scan!)) (and (= ch ".") (< (+ pos 1) src-len) (= (hs-peek 1) ".")) (do (hs-emit! "op" ".." start) (hs-advance! 2) (scan!)) @@ -757,11 +781,30 @@ (fn (src) (let - ((tokens (list)) (pos 0) (src-len (len src))) + ((tokens (list)) (pos 0) (src-len (len src)) (current-line 1)) (define t-cur (fn () (if (< pos src-len) (nth src pos) nil))) (define t-peek (fn (n) (if (< (+ pos n) src-len) (nth src (+ pos n)) nil))) - (define t-advance! (fn (n) (set! pos (+ pos n)))) - (define t-emit! (fn (type value) (append! tokens (hs-make-token type value pos)))) + (define + t-advance! + (fn (n) + (let ((new-pos (+ pos n))) + (define + t-count-nl! + (fn (i) + (when (< i new-pos) + (when (= (nth src i) "\n") + (set! current-line (+ current-line 1))) + (t-count-nl! (+ i 1))))) + (t-count-nl! pos) + (set! pos new-pos)))) + (define + t-emit! + (fn (type value) + (let + ((end-pos (+ pos (if (nil? value) 0 (len (str value)))))) + (append! + tokens + (hs-make-token type value pos end-pos current-line))))) (define scan-to-close! (fn