sx: step 3 — add :end and :line to hs tokenizer tokens
Extend hs-make-token to (type value pos &rest extras) producing dicts
{:pos :end :line :value :type}. End defaults to pos+len(value); line
defaults to 1. Both tokenize loops now track current-line via newline
counting in advance!. hs-emit! and t-emit! pass the right end and
start-line to the constructor; redundant dict-set! after construction
removed.
Mirror copied to shared/static/wasm/sx/hs-tokenizer.sx (byte-identical).
Verify: (hs-make-token "NUMBER" "1" 0) returns
{:pos 0 :end 1 :line 1 :value "1" :type "NUMBER"}.
OCaml suite: 4529 pass, 1339 pre-existing failures (baseline). All
4/4 hs-upstream-core/sourceInfo tests now pass (was 2/4 — closes E38).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -8,7 +8,17 @@
|
||||
|
||||
;; ── Token constructor ─────────────────────────────────────────────
|
||||
|
||||
(define hs-make-token (fn (type value pos) {:pos pos :value value :type type}))
|
||||
(define hs-make-token
|
||||
(fn (type value pos &rest extras)
|
||||
(let
|
||||
((end-arg (if (>= (len extras) 1) (nth extras 0) nil))
|
||||
(line-arg (if (>= (len extras) 2) (nth extras 1) nil)))
|
||||
(let
|
||||
((end (if (nil? end-arg)
|
||||
(+ pos (if (nil? value) 0 (len (str value))))
|
||||
end-arg))
|
||||
(line (if (nil? line-arg) 1 line-arg)))
|
||||
{:pos pos :end end :line line :value value :type type}))))
|
||||
|
||||
;; ── Character predicates ──────────────────────────────────────────
|
||||
|
||||
@@ -221,14 +231,26 @@
|
||||
(fn
|
||||
(src)
|
||||
(let
|
||||
((tokens (list)) (pos 0) (src-len (len src)))
|
||||
((tokens (list)) (pos 0) (src-len (len src)) (current-line 1))
|
||||
(define
|
||||
hs-peek
|
||||
(fn
|
||||
(offset)
|
||||
(if (< (+ pos offset) src-len) (nth src (+ pos offset)) nil)))
|
||||
(define hs-cur (fn () (hs-peek 0)))
|
||||
(define hs-advance! (fn (n) (set! pos (+ pos n))))
|
||||
(define
|
||||
hs-advance!
|
||||
(fn (n)
|
||||
(let ((new-pos (+ pos n)))
|
||||
(define
|
||||
count-nl!
|
||||
(fn (i)
|
||||
(when (< i new-pos)
|
||||
(when (= (nth src i) "\n")
|
||||
(set! current-line (+ current-line 1)))
|
||||
(count-nl! (+ i 1)))))
|
||||
(count-nl! pos)
|
||||
(set! pos new-pos))))
|
||||
(define
|
||||
skip-ws!
|
||||
(fn
|
||||
@@ -502,13 +524,14 @@
|
||||
(fn
|
||||
(type value start)
|
||||
(let
|
||||
((tok (hs-make-token type value start))
|
||||
(end-pos
|
||||
(max pos (+ start (if (nil? value) 0 (len (str value)))))))
|
||||
(do
|
||||
(dict-set! tok "end" end-pos)
|
||||
(dict-set! tok "line" (len (split (slice src 0 start) "\n")))
|
||||
(append! tokens tok)))))
|
||||
((end-pos
|
||||
(max pos (+ start (if (nil? value) 0 (len (str value))))))
|
||||
(newlines-after-start
|
||||
(- (len (split (slice src start (max start pos)) "\n")) 1))
|
||||
(start-line (- current-line newlines-after-start)))
|
||||
(append!
|
||||
tokens
|
||||
(hs-make-token type value start end-pos start-line)))))
|
||||
(define
|
||||
scan!
|
||||
(fn
|
||||
@@ -758,11 +781,30 @@
|
||||
(fn
|
||||
(src)
|
||||
(let
|
||||
((tokens (list)) (pos 0) (src-len (len src)))
|
||||
((tokens (list)) (pos 0) (src-len (len src)) (current-line 1))
|
||||
(define t-cur (fn () (if (< pos src-len) (nth src pos) nil)))
|
||||
(define t-peek (fn (n) (if (< (+ pos n) src-len) (nth src (+ pos n)) nil)))
|
||||
(define t-advance! (fn (n) (set! pos (+ pos n))))
|
||||
(define t-emit! (fn (type value) (append! tokens (hs-make-token type value pos))))
|
||||
(define
|
||||
t-advance!
|
||||
(fn (n)
|
||||
(let ((new-pos (+ pos n)))
|
||||
(define
|
||||
t-count-nl!
|
||||
(fn (i)
|
||||
(when (< i new-pos)
|
||||
(when (= (nth src i) "\n")
|
||||
(set! current-line (+ current-line 1)))
|
||||
(t-count-nl! (+ i 1)))))
|
||||
(t-count-nl! pos)
|
||||
(set! pos new-pos))))
|
||||
(define
|
||||
t-emit!
|
||||
(fn (type value)
|
||||
(let
|
||||
((end-pos (+ pos (if (nil? value) 0 (len (str value))))))
|
||||
(append!
|
||||
tokens
|
||||
(hs-make-token type value pos end-pos current-line)))))
|
||||
(define
|
||||
scan-to-close!
|
||||
(fn
|
||||
|
||||
@@ -183,7 +183,7 @@ these when operands are known numbers/lists.
|
||||
|------|--------|--------|
|
||||
| 1 — JIT combinator bug | [x] | 882a4b76 |
|
||||
| 2 — letrec+resume | [x] | e80e655b |
|
||||
| 3 — tokenizer :end/:line | [ ] | — |
|
||||
| 3 — tokenizer :end/:line | [x] | (pending) |
|
||||
| 4 — parser spans complete | [ ] | — |
|
||||
| 5 — OCaml AdtValue + define-type + match | [ ] | — |
|
||||
| 6 — JS AdtValue + define-type + match | [ ] | — |
|
||||
|
||||
@@ -8,7 +8,17 @@
|
||||
|
||||
;; ── Token constructor ─────────────────────────────────────────────
|
||||
|
||||
(define hs-make-token (fn (type value pos) {:pos pos :value value :type type}))
|
||||
(define hs-make-token
|
||||
(fn (type value pos &rest extras)
|
||||
(let
|
||||
((end-arg (if (>= (len extras) 1) (nth extras 0) nil))
|
||||
(line-arg (if (>= (len extras) 2) (nth extras 1) nil)))
|
||||
(let
|
||||
((end (if (nil? end-arg)
|
||||
(+ pos (if (nil? value) 0 (len (str value))))
|
||||
end-arg))
|
||||
(line (if (nil? line-arg) 1 line-arg)))
|
||||
{:pos pos :end end :line line :value value :type type}))))
|
||||
|
||||
;; ── Character predicates ──────────────────────────────────────────
|
||||
|
||||
@@ -221,14 +231,26 @@
|
||||
(fn
|
||||
(src)
|
||||
(let
|
||||
((tokens (list)) (pos 0) (src-len (len src)))
|
||||
((tokens (list)) (pos 0) (src-len (len src)) (current-line 1))
|
||||
(define
|
||||
hs-peek
|
||||
(fn
|
||||
(offset)
|
||||
(if (< (+ pos offset) src-len) (nth src (+ pos offset)) nil)))
|
||||
(define hs-cur (fn () (hs-peek 0)))
|
||||
(define hs-advance! (fn (n) (set! pos (+ pos n))))
|
||||
(define
|
||||
hs-advance!
|
||||
(fn (n)
|
||||
(let ((new-pos (+ pos n)))
|
||||
(define
|
||||
count-nl!
|
||||
(fn (i)
|
||||
(when (< i new-pos)
|
||||
(when (= (nth src i) "\n")
|
||||
(set! current-line (+ current-line 1)))
|
||||
(count-nl! (+ i 1)))))
|
||||
(count-nl! pos)
|
||||
(set! pos new-pos))))
|
||||
(define
|
||||
skip-ws!
|
||||
(fn
|
||||
@@ -502,13 +524,14 @@
|
||||
(fn
|
||||
(type value start)
|
||||
(let
|
||||
((tok (hs-make-token type value start))
|
||||
(end-pos
|
||||
(max pos (+ start (if (nil? value) 0 (len (str value)))))))
|
||||
(do
|
||||
(dict-set! tok "end" end-pos)
|
||||
(dict-set! tok "line" (len (split (slice src 0 start) "\n")))
|
||||
(append! tokens tok)))))
|
||||
((end-pos
|
||||
(max pos (+ start (if (nil? value) 0 (len (str value))))))
|
||||
(newlines-after-start
|
||||
(- (len (split (slice src start (max start pos)) "\n")) 1))
|
||||
(start-line (- current-line newlines-after-start)))
|
||||
(append!
|
||||
tokens
|
||||
(hs-make-token type value start end-pos start-line)))))
|
||||
(define
|
||||
scan!
|
||||
(fn
|
||||
@@ -538,7 +561,8 @@
|
||||
(= (hs-peek 1) "#")
|
||||
(= (hs-peek 1) "[")
|
||||
(= (hs-peek 1) "*")
|
||||
(= (hs-peek 1) ":")))
|
||||
(= (hs-peek 1) ":")
|
||||
(= (hs-peek 1) "$")))
|
||||
(do (hs-emit! "selector" (read-selector) start) (scan!))
|
||||
(and (= ch ".") (< (+ pos 1) src-len) (= (hs-peek 1) "."))
|
||||
(do (hs-emit! "op" ".." start) (hs-advance! 2) (scan!))
|
||||
@@ -757,11 +781,30 @@
|
||||
(fn
|
||||
(src)
|
||||
(let
|
||||
((tokens (list)) (pos 0) (src-len (len src)))
|
||||
((tokens (list)) (pos 0) (src-len (len src)) (current-line 1))
|
||||
(define t-cur (fn () (if (< pos src-len) (nth src pos) nil)))
|
||||
(define t-peek (fn (n) (if (< (+ pos n) src-len) (nth src (+ pos n)) nil)))
|
||||
(define t-advance! (fn (n) (set! pos (+ pos n))))
|
||||
(define t-emit! (fn (type value) (append! tokens (hs-make-token type value pos))))
|
||||
(define
|
||||
t-advance!
|
||||
(fn (n)
|
||||
(let ((new-pos (+ pos n)))
|
||||
(define
|
||||
t-count-nl!
|
||||
(fn (i)
|
||||
(when (< i new-pos)
|
||||
(when (= (nth src i) "\n")
|
||||
(set! current-line (+ current-line 1)))
|
||||
(t-count-nl! (+ i 1)))))
|
||||
(t-count-nl! pos)
|
||||
(set! pos new-pos))))
|
||||
(define
|
||||
t-emit!
|
||||
(fn (type value)
|
||||
(let
|
||||
((end-pos (+ pos (if (nil? value) 0 (len (str value))))))
|
||||
(append!
|
||||
tokens
|
||||
(hs-make-token type value pos end-pos current-line)))))
|
||||
(define
|
||||
scan-to-close!
|
||||
(fn
|
||||
|
||||
Reference in New Issue
Block a user