sx: step 3 — add :end and :line to hs tokenizer tokens
Extend hs-make-token to (type value pos &rest extras) producing dicts
{:pos :end :line :value :type}. End defaults to pos+len(value); line
defaults to 1. Both tokenize loops now track current-line via newline
counting in advance!. hs-emit! and t-emit! pass the right end and
start-line to the constructor; redundant dict-set! after construction
removed.
Mirror copied to shared/static/wasm/sx/hs-tokenizer.sx (byte-identical).
Verify: (hs-make-token "NUMBER" "1" 0) returns
{:pos 0 :end 1 :line 1 :value "1" :type "NUMBER"}.
OCaml suite: 4529 pass, 1339 pre-existing failures (baseline). All
4/4 hs-upstream-core/sourceInfo tests now pass (was 2/4 — closes E38).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -8,7 +8,17 @@
|
|||||||
|
|
||||||
;; ── Token constructor ─────────────────────────────────────────────
|
;; ── Token constructor ─────────────────────────────────────────────
|
||||||
|
|
||||||
(define hs-make-token (fn (type value pos) {:pos pos :value value :type type}))
|
(define hs-make-token
|
||||||
|
(fn (type value pos &rest extras)
|
||||||
|
(let
|
||||||
|
((end-arg (if (>= (len extras) 1) (nth extras 0) nil))
|
||||||
|
(line-arg (if (>= (len extras) 2) (nth extras 1) nil)))
|
||||||
|
(let
|
||||||
|
((end (if (nil? end-arg)
|
||||||
|
(+ pos (if (nil? value) 0 (len (str value))))
|
||||||
|
end-arg))
|
||||||
|
(line (if (nil? line-arg) 1 line-arg)))
|
||||||
|
{:pos pos :end end :line line :value value :type type}))))
|
||||||
|
|
||||||
;; ── Character predicates ──────────────────────────────────────────
|
;; ── Character predicates ──────────────────────────────────────────
|
||||||
|
|
||||||
@@ -221,14 +231,26 @@
|
|||||||
(fn
|
(fn
|
||||||
(src)
|
(src)
|
||||||
(let
|
(let
|
||||||
((tokens (list)) (pos 0) (src-len (len src)))
|
((tokens (list)) (pos 0) (src-len (len src)) (current-line 1))
|
||||||
(define
|
(define
|
||||||
hs-peek
|
hs-peek
|
||||||
(fn
|
(fn
|
||||||
(offset)
|
(offset)
|
||||||
(if (< (+ pos offset) src-len) (nth src (+ pos offset)) nil)))
|
(if (< (+ pos offset) src-len) (nth src (+ pos offset)) nil)))
|
||||||
(define hs-cur (fn () (hs-peek 0)))
|
(define hs-cur (fn () (hs-peek 0)))
|
||||||
(define hs-advance! (fn (n) (set! pos (+ pos n))))
|
(define
|
||||||
|
hs-advance!
|
||||||
|
(fn (n)
|
||||||
|
(let ((new-pos (+ pos n)))
|
||||||
|
(define
|
||||||
|
count-nl!
|
||||||
|
(fn (i)
|
||||||
|
(when (< i new-pos)
|
||||||
|
(when (= (nth src i) "\n")
|
||||||
|
(set! current-line (+ current-line 1)))
|
||||||
|
(count-nl! (+ i 1)))))
|
||||||
|
(count-nl! pos)
|
||||||
|
(set! pos new-pos))))
|
||||||
(define
|
(define
|
||||||
skip-ws!
|
skip-ws!
|
||||||
(fn
|
(fn
|
||||||
@@ -502,13 +524,14 @@
|
|||||||
(fn
|
(fn
|
||||||
(type value start)
|
(type value start)
|
||||||
(let
|
(let
|
||||||
((tok (hs-make-token type value start))
|
((end-pos
|
||||||
(end-pos
|
(max pos (+ start (if (nil? value) 0 (len (str value))))))
|
||||||
(max pos (+ start (if (nil? value) 0 (len (str value)))))))
|
(newlines-after-start
|
||||||
(do
|
(- (len (split (slice src start (max start pos)) "\n")) 1))
|
||||||
(dict-set! tok "end" end-pos)
|
(start-line (- current-line newlines-after-start)))
|
||||||
(dict-set! tok "line" (len (split (slice src 0 start) "\n")))
|
(append!
|
||||||
(append! tokens tok)))))
|
tokens
|
||||||
|
(hs-make-token type value start end-pos start-line)))))
|
||||||
(define
|
(define
|
||||||
scan!
|
scan!
|
||||||
(fn
|
(fn
|
||||||
@@ -758,11 +781,30 @@
|
|||||||
(fn
|
(fn
|
||||||
(src)
|
(src)
|
||||||
(let
|
(let
|
||||||
((tokens (list)) (pos 0) (src-len (len src)))
|
((tokens (list)) (pos 0) (src-len (len src)) (current-line 1))
|
||||||
(define t-cur (fn () (if (< pos src-len) (nth src pos) nil)))
|
(define t-cur (fn () (if (< pos src-len) (nth src pos) nil)))
|
||||||
(define t-peek (fn (n) (if (< (+ pos n) src-len) (nth src (+ pos n)) nil)))
|
(define t-peek (fn (n) (if (< (+ pos n) src-len) (nth src (+ pos n)) nil)))
|
||||||
(define t-advance! (fn (n) (set! pos (+ pos n))))
|
(define
|
||||||
(define t-emit! (fn (type value) (append! tokens (hs-make-token type value pos))))
|
t-advance!
|
||||||
|
(fn (n)
|
||||||
|
(let ((new-pos (+ pos n)))
|
||||||
|
(define
|
||||||
|
t-count-nl!
|
||||||
|
(fn (i)
|
||||||
|
(when (< i new-pos)
|
||||||
|
(when (= (nth src i) "\n")
|
||||||
|
(set! current-line (+ current-line 1)))
|
||||||
|
(t-count-nl! (+ i 1)))))
|
||||||
|
(t-count-nl! pos)
|
||||||
|
(set! pos new-pos))))
|
||||||
|
(define
|
||||||
|
t-emit!
|
||||||
|
(fn (type value)
|
||||||
|
(let
|
||||||
|
((end-pos (+ pos (if (nil? value) 0 (len (str value))))))
|
||||||
|
(append!
|
||||||
|
tokens
|
||||||
|
(hs-make-token type value pos end-pos current-line)))))
|
||||||
(define
|
(define
|
||||||
scan-to-close!
|
scan-to-close!
|
||||||
(fn
|
(fn
|
||||||
|
|||||||
@@ -183,7 +183,7 @@ these when operands are known numbers/lists.
|
|||||||
|------|--------|--------|
|
|------|--------|--------|
|
||||||
| 1 — JIT combinator bug | [x] | 882a4b76 |
|
| 1 — JIT combinator bug | [x] | 882a4b76 |
|
||||||
| 2 — letrec+resume | [x] | e80e655b |
|
| 2 — letrec+resume | [x] | e80e655b |
|
||||||
| 3 — tokenizer :end/:line | [ ] | — |
|
| 3 — tokenizer :end/:line | [x] | (pending) |
|
||||||
| 4 — parser spans complete | [ ] | — |
|
| 4 — parser spans complete | [ ] | — |
|
||||||
| 5 — OCaml AdtValue + define-type + match | [ ] | — |
|
| 5 — OCaml AdtValue + define-type + match | [ ] | — |
|
||||||
| 6 — JS AdtValue + define-type + match | [ ] | — |
|
| 6 — JS AdtValue + define-type + match | [ ] | — |
|
||||||
|
|||||||
@@ -8,7 +8,17 @@
|
|||||||
|
|
||||||
;; ── Token constructor ─────────────────────────────────────────────
|
;; ── Token constructor ─────────────────────────────────────────────
|
||||||
|
|
||||||
(define hs-make-token (fn (type value pos) {:pos pos :value value :type type}))
|
(define hs-make-token
|
||||||
|
(fn (type value pos &rest extras)
|
||||||
|
(let
|
||||||
|
((end-arg (if (>= (len extras) 1) (nth extras 0) nil))
|
||||||
|
(line-arg (if (>= (len extras) 2) (nth extras 1) nil)))
|
||||||
|
(let
|
||||||
|
((end (if (nil? end-arg)
|
||||||
|
(+ pos (if (nil? value) 0 (len (str value))))
|
||||||
|
end-arg))
|
||||||
|
(line (if (nil? line-arg) 1 line-arg)))
|
||||||
|
{:pos pos :end end :line line :value value :type type}))))
|
||||||
|
|
||||||
;; ── Character predicates ──────────────────────────────────────────
|
;; ── Character predicates ──────────────────────────────────────────
|
||||||
|
|
||||||
@@ -221,14 +231,26 @@
|
|||||||
(fn
|
(fn
|
||||||
(src)
|
(src)
|
||||||
(let
|
(let
|
||||||
((tokens (list)) (pos 0) (src-len (len src)))
|
((tokens (list)) (pos 0) (src-len (len src)) (current-line 1))
|
||||||
(define
|
(define
|
||||||
hs-peek
|
hs-peek
|
||||||
(fn
|
(fn
|
||||||
(offset)
|
(offset)
|
||||||
(if (< (+ pos offset) src-len) (nth src (+ pos offset)) nil)))
|
(if (< (+ pos offset) src-len) (nth src (+ pos offset)) nil)))
|
||||||
(define hs-cur (fn () (hs-peek 0)))
|
(define hs-cur (fn () (hs-peek 0)))
|
||||||
(define hs-advance! (fn (n) (set! pos (+ pos n))))
|
(define
|
||||||
|
hs-advance!
|
||||||
|
(fn (n)
|
||||||
|
(let ((new-pos (+ pos n)))
|
||||||
|
(define
|
||||||
|
count-nl!
|
||||||
|
(fn (i)
|
||||||
|
(when (< i new-pos)
|
||||||
|
(when (= (nth src i) "\n")
|
||||||
|
(set! current-line (+ current-line 1)))
|
||||||
|
(count-nl! (+ i 1)))))
|
||||||
|
(count-nl! pos)
|
||||||
|
(set! pos new-pos))))
|
||||||
(define
|
(define
|
||||||
skip-ws!
|
skip-ws!
|
||||||
(fn
|
(fn
|
||||||
@@ -502,13 +524,14 @@
|
|||||||
(fn
|
(fn
|
||||||
(type value start)
|
(type value start)
|
||||||
(let
|
(let
|
||||||
((tok (hs-make-token type value start))
|
((end-pos
|
||||||
(end-pos
|
(max pos (+ start (if (nil? value) 0 (len (str value))))))
|
||||||
(max pos (+ start (if (nil? value) 0 (len (str value)))))))
|
(newlines-after-start
|
||||||
(do
|
(- (len (split (slice src start (max start pos)) "\n")) 1))
|
||||||
(dict-set! tok "end" end-pos)
|
(start-line (- current-line newlines-after-start)))
|
||||||
(dict-set! tok "line" (len (split (slice src 0 start) "\n")))
|
(append!
|
||||||
(append! tokens tok)))))
|
tokens
|
||||||
|
(hs-make-token type value start end-pos start-line)))))
|
||||||
(define
|
(define
|
||||||
scan!
|
scan!
|
||||||
(fn
|
(fn
|
||||||
@@ -538,7 +561,8 @@
|
|||||||
(= (hs-peek 1) "#")
|
(= (hs-peek 1) "#")
|
||||||
(= (hs-peek 1) "[")
|
(= (hs-peek 1) "[")
|
||||||
(= (hs-peek 1) "*")
|
(= (hs-peek 1) "*")
|
||||||
(= (hs-peek 1) ":")))
|
(= (hs-peek 1) ":")
|
||||||
|
(= (hs-peek 1) "$")))
|
||||||
(do (hs-emit! "selector" (read-selector) start) (scan!))
|
(do (hs-emit! "selector" (read-selector) start) (scan!))
|
||||||
(and (= ch ".") (< (+ pos 1) src-len) (= (hs-peek 1) "."))
|
(and (= ch ".") (< (+ pos 1) src-len) (= (hs-peek 1) "."))
|
||||||
(do (hs-emit! "op" ".." start) (hs-advance! 2) (scan!))
|
(do (hs-emit! "op" ".." start) (hs-advance! 2) (scan!))
|
||||||
@@ -757,11 +781,30 @@
|
|||||||
(fn
|
(fn
|
||||||
(src)
|
(src)
|
||||||
(let
|
(let
|
||||||
((tokens (list)) (pos 0) (src-len (len src)))
|
((tokens (list)) (pos 0) (src-len (len src)) (current-line 1))
|
||||||
(define t-cur (fn () (if (< pos src-len) (nth src pos) nil)))
|
(define t-cur (fn () (if (< pos src-len) (nth src pos) nil)))
|
||||||
(define t-peek (fn (n) (if (< (+ pos n) src-len) (nth src (+ pos n)) nil)))
|
(define t-peek (fn (n) (if (< (+ pos n) src-len) (nth src (+ pos n)) nil)))
|
||||||
(define t-advance! (fn (n) (set! pos (+ pos n))))
|
(define
|
||||||
(define t-emit! (fn (type value) (append! tokens (hs-make-token type value pos))))
|
t-advance!
|
||||||
|
(fn (n)
|
||||||
|
(let ((new-pos (+ pos n)))
|
||||||
|
(define
|
||||||
|
t-count-nl!
|
||||||
|
(fn (i)
|
||||||
|
(when (< i new-pos)
|
||||||
|
(when (= (nth src i) "\n")
|
||||||
|
(set! current-line (+ current-line 1)))
|
||||||
|
(t-count-nl! (+ i 1)))))
|
||||||
|
(t-count-nl! pos)
|
||||||
|
(set! pos new-pos))))
|
||||||
|
(define
|
||||||
|
t-emit!
|
||||||
|
(fn (type value)
|
||||||
|
(let
|
||||||
|
((end-pos (+ pos (if (nil? value) 0 (len (str value))))))
|
||||||
|
(append!
|
||||||
|
tokens
|
||||||
|
(hs-make-token type value pos end-pos current-line)))))
|
||||||
(define
|
(define
|
||||||
scan-to-close!
|
scan-to-close!
|
||||||
(fn
|
(fn
|
||||||
|
|||||||
Reference in New Issue
Block a user