From 5bcda5c88c3491066511b7d7adf2af54e8585ff3 Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 10 May 2026 21:05:28 +0000 Subject: [PATCH] datalog: tokenizer raises on unterminated string + quoted atom (251/251) Bug: read-quoted ran to EOF silently when the closing quote was missing. The token's value was whatever ran-to-end string had been accumulated; the parser later saw an unexpected EOF, but the error message blamed the wrong location ("expected `)` got eof") and hid the real problem. Fix: read-quoted now raises with a message that distinguishes strings from quoted atoms, including the position where the opening quote was lost. The escape-sequence handling and proper closing are unaffected. 2 new tokenize tests. --- lib/datalog/scoreboard.json | 8 ++++---- lib/datalog/scoreboard.md | 4 ++-- lib/datalog/tests/tokenize.sx | 19 +++++++++++++++++++ lib/datalog/tokenizer.sx | 6 +++++- 4 files changed, 30 insertions(+), 7 deletions(-) diff --git a/lib/datalog/scoreboard.json b/lib/datalog/scoreboard.json index 6c953f66..f36dbeba 100644 --- a/lib/datalog/scoreboard.json +++ b/lib/datalog/scoreboard.json @@ -1,10 +1,10 @@ { "lang": "datalog", - "total_passed": 249, + "total_passed": 251, "total_failed": 0, - "total": 249, + "total": 251, "suites": [ - {"name":"tokenize","passed":27,"failed":0,"total":27}, + {"name":"tokenize","passed":29,"failed":0,"total":29}, {"name":"parse","passed":22,"failed":0,"total":22}, {"name":"unify","passed":28,"failed":0,"total":28}, {"name":"eval","passed":36,"failed":0,"total":36}, @@ -16,5 +16,5 @@ {"name":"magic","passed":34,"failed":0,"total":34}, {"name":"demo","passed":21,"failed":0,"total":21} ], - "generated": "2026-05-10T20:59:17+00:00" + "generated": "2026-05-10T21:05:12+00:00" } diff --git a/lib/datalog/scoreboard.md b/lib/datalog/scoreboard.md index d40d724f..b7adc8c8 100644 --- a/lib/datalog/scoreboard.md +++ b/lib/datalog/scoreboard.md @@ -1,10 +1,10 @@ # datalog scoreboard -**249 / 249 passing** (0 failure(s)). +**251 / 251 passing** (0 failure(s)). | Suite | Passed | Total | Status | |-------|--------|-------|--------| -| tokenize | 27 | 27 | ok | +| tokenize | 29 | 29 | ok | | parse | 22 | 22 | ok | | unify | 28 | 28 | ok | | eval | 36 | 36 | ok | diff --git a/lib/datalog/tests/tokenize.sx b/lib/datalog/tests/tokenize.sx index 7ab73212..6bf190fc 100644 --- a/lib/datalog/tests/tokenize.sx +++ b/lib/datalog/tests/tokenize.sx @@ -118,6 +118,25 @@ "block comment" (dl-tk-types (dl-tokenize "/* a\nb */ x.")) (list "atom" "punct" "eof")) + ;; Unterminated string / quoted-atom must raise. + (dl-tk-test! + "unterminated string raises" + (let ((threw false)) + (do + (guard (e (#t (set! threw true))) + (dl-tokenize "\"unclosed")) + threw)) + true) + + (dl-tk-test! + "unterminated quoted atom raises" + (let ((threw false)) + (do + (guard (e (#t (set! threw true))) + (dl-tokenize "'unclosed")) + threw)) + true) + ;; Unterminated block comment must raise — previously it was ;; silently consumed to EOF. (dl-tk-test! diff --git a/lib/datalog/tokenizer.sx b/lib/datalog/tokenizer.sx index 5b7b48d2..e48ca7df 100644 --- a/lib/datalog/tokenizer.sx +++ b/lib/datalog/tokenizer.sx @@ -123,7 +123,11 @@ (fn () (cond - ((>= pos src-len) nil) + ((>= pos src-len) + (error + (str "Tokenizer: unterminated " + (if (= quote-char "'") "quoted atom" "string") + " (started near position " pos ")"))) ((= (cur) "\\") (do (advance! 1)