datalog: tokenizer raises on unexpected characters (256/256)
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 45s
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 45s
Bug: characters not recognised by any branch of `scan!` (`?`, `!`, `#`, `@`, `&`, `|`, `\\`, `^`, etc.) were silently consumed via `(else (advance! 1) (scan!))`. Programs with typos would parse to a stripped version of themselves with no warning — `?(X).` became `(X).` and produced confusing downstream errors. Fix: the else branch now raises a clear "unexpected character" error with the offending char and its position. 1 new tokenize test.
This commit is contained in:
@@ -1,10 +1,10 @@
|
|||||||
{
|
{
|
||||||
"lang": "datalog",
|
"lang": "datalog",
|
||||||
"total_passed": 255,
|
"total_passed": 256,
|
||||||
"total_failed": 0,
|
"total_failed": 0,
|
||||||
"total": 255,
|
"total": 256,
|
||||||
"suites": [
|
"suites": [
|
||||||
{"name":"tokenize","passed":29,"failed":0,"total":29},
|
{"name":"tokenize","passed":30,"failed":0,"total":30},
|
||||||
{"name":"parse","passed":22,"failed":0,"total":22},
|
{"name":"parse","passed":22,"failed":0,"total":22},
|
||||||
{"name":"unify","passed":28,"failed":0,"total":28},
|
{"name":"unify","passed":28,"failed":0,"total":28},
|
||||||
{"name":"eval","passed":38,"failed":0,"total":38},
|
{"name":"eval","passed":38,"failed":0,"total":38},
|
||||||
@@ -16,5 +16,5 @@
|
|||||||
{"name":"magic","passed":36,"failed":0,"total":36},
|
{"name":"magic","passed":36,"failed":0,"total":36},
|
||||||
{"name":"demo","passed":21,"failed":0,"total":21}
|
{"name":"demo","passed":21,"failed":0,"total":21}
|
||||||
],
|
],
|
||||||
"generated": "2026-05-10T21:13:14+00:00"
|
"generated": "2026-05-10T21:16:53+00:00"
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,10 +1,10 @@
|
|||||||
# datalog scoreboard
|
# datalog scoreboard
|
||||||
|
|
||||||
**255 / 255 passing** (0 failure(s)).
|
**256 / 256 passing** (0 failure(s)).
|
||||||
|
|
||||||
| Suite | Passed | Total | Status |
|
| Suite | Passed | Total | Status |
|
||||||
|-------|--------|-------|--------|
|
|-------|--------|-------|--------|
|
||||||
| tokenize | 29 | 29 | ok |
|
| tokenize | 30 | 30 | ok |
|
||||||
| parse | 22 | 22 | ok |
|
| parse | 22 | 22 | ok |
|
||||||
| unify | 28 | 28 | ok |
|
| unify | 28 | 28 | ok |
|
||||||
| eval | 38 | 38 | ok |
|
| eval | 38 | 38 | ok |
|
||||||
|
|||||||
@@ -118,6 +118,18 @@
|
|||||||
"block comment"
|
"block comment"
|
||||||
(dl-tk-types (dl-tokenize "/* a\nb */ x."))
|
(dl-tk-types (dl-tokenize "/* a\nb */ x."))
|
||||||
(list "atom" "punct" "eof"))
|
(list "atom" "punct" "eof"))
|
||||||
|
;; Unexpected characters surface at tokenize time rather
|
||||||
|
;; than being silently consumed (previously `?(X)` parsed as
|
||||||
|
;; if the leading `?` weren't there).
|
||||||
|
(dl-tk-test!
|
||||||
|
"unexpected char raises"
|
||||||
|
(let ((threw false))
|
||||||
|
(do
|
||||||
|
(guard (e (#t (set! threw true)))
|
||||||
|
(dl-tokenize "?(X)"))
|
||||||
|
threw))
|
||||||
|
true)
|
||||||
|
|
||||||
;; Unterminated string / quoted-atom must raise.
|
;; Unterminated string / quoted-atom must raise.
|
||||||
(dl-tk-test!
|
(dl-tk-test!
|
||||||
"unterminated string raises"
|
"unterminated string raises"
|
||||||
|
|||||||
@@ -254,7 +254,9 @@
|
|||||||
(dl-emit! "op" "/" start)
|
(dl-emit! "op" "/" start)
|
||||||
(advance! 1)
|
(advance! 1)
|
||||||
(scan!)))
|
(scan!)))
|
||||||
(else (do (advance! 1) (scan!)))))))))
|
(else (error
|
||||||
|
(str "Tokenizer: unexpected character '" ch
|
||||||
|
"' at position " start)))))))))
|
||||||
(scan!)
|
(scan!)
|
||||||
(dl-emit! "eof" nil pos)
|
(dl-emit! "eof" nil pos)
|
||||||
tokens)))
|
tokens)))
|
||||||
|
|||||||
Reference in New Issue
Block a user