sx: step 3 — add :end and :line to hs tokenizer tokens

Extend hs-make-token to (type value pos &rest extras) producing dicts
{:pos :end :line :value :type}. End defaults to pos+len(value); line
defaults to 1. Both tokenize loops now track current-line via newline
counting in advance!. hs-emit! and t-emit! pass the right end and
start-line to the constructor; redundant dict-set! after construction
removed.

Mirror copied to shared/static/wasm/sx/hs-tokenizer.sx (byte-identical).

Verify: (hs-make-token "NUMBER" "1" 0) returns
  {:pos 0 :end 1 :line 1 :value "1" :type "NUMBER"}.

OCaml suite: 4529 pass, 1339 pre-existing failures (baseline). All
4/4 hs-upstream-core/sourceInfo tests now pass (was 2/4 — closes E38).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-06 22:14:10 +00:00
parent ccf8a0fb90
commit 023bc2d80c
3 changed files with 113 additions and 28 deletions

View File

@@ -8,7 +8,17 @@
;; ── Token constructor ─────────────────────────────────────────────
(define hs-make-token (fn (type value pos) {:pos pos :value value :type type}))
(define hs-make-token
(fn (type value pos &rest extras)
(let
((end-arg (if (>= (len extras) 1) (nth extras 0) nil))
(line-arg (if (>= (len extras) 2) (nth extras 1) nil)))
(let
((end (if (nil? end-arg)
(+ pos (if (nil? value) 0 (len (str value))))
end-arg))
(line (if (nil? line-arg) 1 line-arg)))
{:pos pos :end end :line line :value value :type type}))))
;; ── Character predicates ──────────────────────────────────────────
@@ -221,14 +231,26 @@
(fn
(src)
(let
((tokens (list)) (pos 0) (src-len (len src)))
((tokens (list)) (pos 0) (src-len (len src)) (current-line 1))
(define
hs-peek
(fn
(offset)
(if (< (+ pos offset) src-len) (nth src (+ pos offset)) nil)))
(define hs-cur (fn () (hs-peek 0)))
(define hs-advance! (fn (n) (set! pos (+ pos n))))
(define
hs-advance!
(fn (n)
(let ((new-pos (+ pos n)))
(define
count-nl!
(fn (i)
(when (< i new-pos)
(when (= (nth src i) "\n")
(set! current-line (+ current-line 1)))
(count-nl! (+ i 1)))))
(count-nl! pos)
(set! pos new-pos))))
(define
skip-ws!
(fn
@@ -502,13 +524,14 @@
(fn
(type value start)
(let
((tok (hs-make-token type value start))
(end-pos
(max pos (+ start (if (nil? value) 0 (len (str value)))))))
(do
(dict-set! tok "end" end-pos)
(dict-set! tok "line" (len (split (slice src 0 start) "\n")))
(append! tokens tok)))))
((end-pos
(max pos (+ start (if (nil? value) 0 (len (str value))))))
(newlines-after-start
(- (len (split (slice src start (max start pos)) "\n")) 1))
(start-line (- current-line newlines-after-start)))
(append!
tokens
(hs-make-token type value start end-pos start-line)))))
(define
scan!
(fn
@@ -538,7 +561,8 @@
(= (hs-peek 1) "#")
(= (hs-peek 1) "[")
(= (hs-peek 1) "*")
(= (hs-peek 1) ":")))
(= (hs-peek 1) ":")
(= (hs-peek 1) "$")))
(do (hs-emit! "selector" (read-selector) start) (scan!))
(and (= ch ".") (< (+ pos 1) src-len) (= (hs-peek 1) "."))
(do (hs-emit! "op" ".." start) (hs-advance! 2) (scan!))
@@ -757,11 +781,30 @@
(fn
(src)
(let
((tokens (list)) (pos 0) (src-len (len src)))
((tokens (list)) (pos 0) (src-len (len src)) (current-line 1))
(define t-cur (fn () (if (< pos src-len) (nth src pos) nil)))
(define t-peek (fn (n) (if (< (+ pos n) src-len) (nth src (+ pos n)) nil)))
(define t-advance! (fn (n) (set! pos (+ pos n))))
(define t-emit! (fn (type value) (append! tokens (hs-make-token type value pos))))
(define
t-advance!
(fn (n)
(let ((new-pos (+ pos n)))
(define
t-count-nl!
(fn (i)
(when (< i new-pos)
(when (= (nth src i) "\n")
(set! current-line (+ current-line 1)))
(t-count-nl! (+ i 1)))))
(t-count-nl! pos)
(set! pos new-pos))))
(define
t-emit!
(fn (type value)
(let
((end-pos (+ pos (if (nil? value) 0 (len (str value))))))
(append!
tokens
(hs-make-token type value pos end-pos current-line)))))
(define
scan-to-close!
(fn