lua: minimal Lua pattern engine for string.find (classes/anchors/quantifiers)
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Has been cancelled

This commit is contained in:
2026-04-24 23:05:48 +00:00
parent 3ec52d4556
commit bd0377b6a3
4 changed files with 210 additions and 44 deletions

View File

@@ -696,6 +696,163 @@
(dict-set! coroutine "wrap" lua-coroutine-wrap)
;; ── string library ────────────────────────────────────────────
;; ── Lua patterns (minimal) ────────────────────────────────────
;; Predicates for single-char classes
(define
lua-pat-class-match
(fn (class c)
(cond
((= class "d") (and (>= c "0") (<= c "9")))
((= class "D") (not (and (>= c "0") (<= c "9"))))
((= class "a") (or (and (>= c "a") (<= c "z")) (and (>= c "A") (<= c "Z"))))
((= class "A") (not (or (and (>= c "a") (<= c "z")) (and (>= c "A") (<= c "Z")))))
((= class "s") (or (= c " ") (= c "\t") (= c "\n") (= c "\r")))
((= class "S") (not (or (= c " ") (= c "\t") (= c "\n") (= c "\r"))))
((= class "w")
(or (and (>= c "a") (<= c "z")) (and (>= c "A") (<= c "Z")) (and (>= c "0") (<= c "9"))))
((= class "W")
(not (or (and (>= c "a") (<= c "z")) (and (>= c "A") (<= c "Z")) (and (>= c "0") (<= c "9")))))
((= class "l") (and (>= c "a") (<= c "z")))
((= class "L") (not (and (>= c "a") (<= c "z"))))
((= class "u") (and (>= c "A") (<= c "Z")))
((= class "U") (not (and (>= c "A") (<= c "Z"))))
((= class "p") (or (and (>= c "!") (<= c "/")) (and (>= c ":") (<= c "@")) (and (>= c "[") (<= c "`")) (and (>= c "{") (<= c "~"))))
((= class "P") (not (or (and (>= c "!") (<= c "/")) (and (>= c ":") (<= c "@")) (and (>= c "[") (<= c "`")) (and (>= c "{") (<= c "~")))))
((= class "c") (or (< c " ") (= c "\127")))
((= class "C") (not (or (< c " ") (= c "\127"))))
((= class "x")
(or (and (>= c "0") (<= c "9")) (and (>= c "a") (<= c "f")) (and (>= c "A") (<= c "F"))))
((= class "X")
(not (or (and (>= c "0") (<= c "9")) (and (>= c "a") (<= c "f")) (and (>= c "A") (<= c "F")))))
(else (= c class)))))
;; Match a single "pattern atom" (char or class) at src position — returns true or false.
;; pat-pos is position of the atom start. Returns the atom's length in pat (1 or 2).
(define
lua-pat-atom-len
(fn (pat pat-pos)
(cond
((>= pat-pos (len pat)) 0)
((= (char-at pat pat-pos) "%") 2)
(else 1))))
(define
lua-pat-atom-match
(fn (pat pat-pos s s-pos)
(cond
((>= s-pos (len s)) false)
((>= pat-pos (len pat)) false)
(else
(let ((pc (char-at pat pat-pos)) (sc (char-at s s-pos)))
(cond
((= pc ".") true)
((= pc "%")
(cond
((>= (+ pat-pos 1) (len pat)) false)
(else (lua-pat-class-match (char-at pat (+ pat-pos 1)) sc))))
(else (= pc sc))))))))
;; Match pattern against string starting at s-pos and pat-pos. Returns end-pos (inclusive+1) or -1 on no-match.
(define
lua-pat-match
(fn (pat pat-pos s s-pos)
(cond
((>= pat-pos (len pat)) s-pos)
((and (= (char-at pat pat-pos) "$") (= (+ pat-pos 1) (len pat)))
(if (= s-pos (len s)) s-pos -1))
(else
(let
((alen (lua-pat-atom-len pat pat-pos)))
(let
((next-pat (+ pat-pos alen)))
(let
((qc (if (< next-pat (len pat)) (char-at pat next-pat) "")))
(cond
((= qc "*") (lua-pat-match-greedy pat pat-pos alen s s-pos (+ next-pat 1) 0))
((= qc "+") (lua-pat-match-greedy pat pat-pos alen s s-pos (+ next-pat 1) 1))
((= qc "-") (lua-pat-match-lazy pat pat-pos alen s s-pos (+ next-pat 1)))
((= qc "?")
(let ((tried (if (lua-pat-atom-match pat pat-pos s s-pos)
(lua-pat-match pat (+ next-pat 1) s (+ s-pos 1))
-1)))
(if (>= tried 0) tried (lua-pat-match pat (+ next-pat 1) s s-pos))))
(else
(cond
((lua-pat-atom-match pat pat-pos s s-pos)
(lua-pat-match pat next-pat s (+ s-pos 1)))
(else -1)))))))))))
(define
lua-pat-match-greedy
(fn (pat atom-pos atom-len s s-pos rest-pat-pos min-count)
(let ((count 0) (i s-pos))
(begin
(define
count-loop
(fn ()
(when (lua-pat-atom-match pat atom-pos s i)
(begin (set! i (+ i 1)) (set! count (+ count 1)) (count-loop)))))
(count-loop)
(let ((best -1))
(begin
(define
try-loop
(fn (k)
(when (and (< best 0) (>= k min-count))
(let ((r (lua-pat-match pat rest-pat-pos s (+ s-pos k))))
(cond
((>= r 0) (set! best r))
(else (try-loop (- k 1))))))))
(try-loop count)
best))))))
(define
lua-pat-match-lazy
(fn (pat atom-pos atom-len s s-pos rest-pat-pos)
(let ((best -1) (i s-pos))
(begin
(define
try-loop
(fn ()
(when (< best 0)
(let ((r (lua-pat-match pat rest-pat-pos s i)))
(cond
((>= r 0) (set! best r))
((and (< i (len s)) (lua-pat-atom-match pat atom-pos s i))
(begin (set! i (+ i 1)) (try-loop)))
(else (set! best -2)))))))
(try-loop)
(if (= best -2) -1 best)))))
;; Top-level find: return (start-index-0based . end-index) or nil on no match.
;; If pat starts with ^, anchor to init. Otherwise scan.
(define
lua-pat-find
(fn (pat s init)
(let
((anchored (and (> (len pat) 0) (= (char-at pat 0) "^"))))
(let
((start-pat (if anchored 1 0)))
(cond
(anchored
(let ((end (lua-pat-match pat start-pat s init)))
(if (>= end 0) (list init end) nil)))
(else
(let ((i init) (result nil))
(begin
(define
scan
(fn ()
(when (and (= result nil) (<= i (len s)))
(let ((end (lua-pat-match pat 0 s i)))
(cond
((>= end 0) (set! result (list i end)))
(else (begin (set! i (+ i 1)) (scan))))))))
(scan)
result))))))))
(define string {})
(define __ascii-32-126 " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~")
@@ -784,20 +941,28 @@
(fn (&rest args)
(let ((s (first args))
(pat (nth args 1))
(init (if (> (len args) 2) (nth args 2) 1)))
(let ((start-i (cond
((< init 0) (+ (len s) init 1))
((= init 0) 1)
(else init))))
(let ((sub (if (<= start-i 1) s (substring s (- start-i 1) (len s)))))
(let ((idx (index-of sub pat)))
(cond
((< idx 0) nil)
(else
(list
(quote lua-multi)
(+ start-i idx)
(+ start-i idx (len pat) -1))))))))))
(init (if (> (len args) 2) (nth args 2) 1))
(plain (if (> (len args) 3) (nth args 3) false)))
(let ((start-i0 (cond
((< init 0) (let ((v (+ (len s) init))) (if (< v 0) 0 v)))
((= init 0) 0)
(else (- init 1)))))
(cond
((lua-truthy? plain)
(let ((sub (if (<= start-i0 0) s (substring s start-i0 (len s)))))
(let ((idx (index-of sub pat)))
(cond
((< idx 0) nil)
(else
(list
(quote lua-multi)
(+ start-i0 idx 1)
(+ start-i0 idx (len pat))))))))
(else
(let ((r (lua-pat-find pat s start-i0)))
(cond
((= r nil) nil)
(else (list (quote lua-multi) (+ (first r) 1) (nth r 1)))))))))))
;; Literal-only string.match: returns matched substring or nil (no captures since no pattern).
(define

View File

@@ -51,7 +51,7 @@
"name": "attrib.lua",
"status": "fail",
"reason": "other: Unhandled exception: \\\"Unhandled exception: \\\\\\\"lua: module 'C' not found\\\\\\\"\\",
"ms": 6342
"ms": 6112
},
{
"name": "big.lua",
@@ -63,7 +63,7 @@
"name": "calls.lua",
"status": "fail",
"reason": "undefined symbol: fat\\",
"ms": 5134
"ms": 4800
},
{
"name": "checktable.lua",
@@ -75,7 +75,7 @@
"name": "closure.lua",
"status": "timeout",
"reason": "per-test timeout",
"ms": 8007
"ms": 8008
},
{
"name": "code.lua",
@@ -87,7 +87,7 @@
"name": "constructs.lua",
"status": "fail",
"reason": "other: Unhandled exception: \\\"Unhandled exception: \\\\\\\"lua: attempt to compare incompat",
"ms": 7182
"ms": 6618
},
{
"name": "db.lua",
@@ -99,13 +99,13 @@
"name": "errors.lua",
"status": "fail",
"reason": "other: Unhandled exception: \\\"Unhandled exception: \\\\\\\"assertion failed!\\\\\\\"\\",
"ms": 3464
"ms": 3080
},
{
"name": "events.lua",
"status": "fail",
"reason": "other: Unhandled exception: \\\"Unhandled exception: \\\\\\\"assertion failed!\\\\\\\"\\",
"ms": 7726
"ms": 7192
},
{
"name": "files.lua",
@@ -123,13 +123,13 @@
"name": "literals.lua",
"status": "fail",
"reason": "other: Unhandled exception: \\\"Unhandled exception: \\\\\\\"assertion failed!\\\\\\\"\\",
"ms": 1940
"ms": 1777
},
{
"name": "locals.lua",
"status": "fail",
"reason": "other: Unhandled exception: \\\"Unhandled exception: \\\\\\\"lua: attempt to call non-functio",
"ms": 1759
"ms": 1644
},
{
"name": "main.lua",
@@ -141,43 +141,43 @@
"name": "math.lua",
"status": "fail",
"reason": "other: Unhandled exception: \\\"Unhandled exception: \\\\\\\"assertion failed!\\\\\\\"\\",
"ms": 4487
"ms": 3999
},
{
"name": "nextvar.lua",
"status": "fail",
"reason": "other: Unhandled exception: \\\"Unhandled exception: \\\\\\\"assertion failed!\\\\\\\"\\",
"ms": 7441
"ms": 6948
},
{
"name": "pm.lua",
"status": "fail",
"reason": "other: Unhandled exception: \\\"Unhandled exception: \\\\\\\"assertion failed!\\\\\\\"\\",
"ms": 6440
"ms": 6326
},
{
"name": "sort.lua",
"status": "timeout",
"reason": "per-test timeout",
"ms": 8007
"ms": 8008
},
{
"name": "strings.lua",
"status": "fail",
"reason": "other: Unhandled exception: \\\"Unhandled exception: \\\\\\\"assertion failed!\\\\\\\"\\",
"ms": 4243
"ms": 3975
},
{
"name": "vararg.lua",
"status": "fail",
"reason": "other: Unhandled exception: \\\"Unhandled exception: \\\\\\\"assertion failed!\\\\\\\"\\",
"ms": 2425
"ms": 2152
},
{
"name": "verybig.lua",
"status": "pass",
"reason": "",
"ms": 622
"ms": 562
}
]
}

View File

@@ -18,25 +18,25 @@ fail=12 timeout=3 skip=8 total=24
|---|---|---|---:|
| all.lua | skip | driver uses dofile to chain other tests | 0 |
| api.lua | skip | requires testC (C debug library) | 0 |
| attrib.lua | fail | other: Unhandled exception: \"Unhandled exception: \\\"lua: module 'C' not found\\\"\ | 6342 |
| attrib.lua | fail | other: Unhandled exception: \"Unhandled exception: \\\"lua: module 'C' not found\\\"\ | 6112 |
| big.lua | timeout | per-test timeout | 8008 |
| calls.lua | fail | undefined symbol: fat\ | 5134 |
| calls.lua | fail | undefined symbol: fat\ | 4800 |
| checktable.lua | skip | internal debug helpers | 0 |
| closure.lua | timeout | per-test timeout | 8007 |
| closure.lua | timeout | per-test timeout | 8008 |
| code.lua | skip | bytecode inspection via debug library | 0 |
| constructs.lua | fail | other: Unhandled exception: \"Unhandled exception: \\\"lua: attempt to compare incompat | 7182 |
| constructs.lua | fail | other: Unhandled exception: \"Unhandled exception: \\\"lua: attempt to compare incompat | 6618 |
| db.lua | skip | debug library | 0 |
| errors.lua | fail | other: Unhandled exception: \"Unhandled exception: \\\"assertion failed!\\\"\ | 3464 |
| events.lua | fail | other: Unhandled exception: \"Unhandled exception: \\\"assertion failed!\\\"\ | 7726 |
| errors.lua | fail | other: Unhandled exception: \"Unhandled exception: \\\"assertion failed!\\\"\ | 3080 |
| events.lua | fail | other: Unhandled exception: \"Unhandled exception: \\\"assertion failed!\\\"\ | 7192 |
| files.lua | skip | io library | 0 |
| gc.lua | skip | collectgarbage / finalisers | 0 |
| literals.lua | fail | other: Unhandled exception: \"Unhandled exception: \\\"assertion failed!\\\"\ | 1940 |
| locals.lua | fail | other: Unhandled exception: \"Unhandled exception: \\\"lua: attempt to call non-functio | 1759 |
| literals.lua | fail | other: Unhandled exception: \"Unhandled exception: \\\"assertion failed!\\\"\ | 1777 |
| locals.lua | fail | other: Unhandled exception: \"Unhandled exception: \\\"lua: attempt to call non-functio | 1644 |
| main.lua | skip | standalone interpreter driver | 0 |
| math.lua | fail | other: Unhandled exception: \"Unhandled exception: \\\"assertion failed!\\\"\ | 4487 |
| nextvar.lua | fail | other: Unhandled exception: \"Unhandled exception: \\\"assertion failed!\\\"\ | 7441 |
| pm.lua | fail | other: Unhandled exception: \"Unhandled exception: \\\"assertion failed!\\\"\ | 6440 |
| sort.lua | timeout | per-test timeout | 8007 |
| strings.lua | fail | other: Unhandled exception: \"Unhandled exception: \\\"assertion failed!\\\"\ | 4243 |
| vararg.lua | fail | other: Unhandled exception: \"Unhandled exception: \\\"assertion failed!\\\"\ | 2425 |
| verybig.lua | pass | - | 622 |
| math.lua | fail | other: Unhandled exception: \"Unhandled exception: \\\"assertion failed!\\\"\ | 3999 |
| nextvar.lua | fail | other: Unhandled exception: \"Unhandled exception: \\\"assertion failed!\\\"\ | 6948 |
| pm.lua | fail | other: Unhandled exception: \"Unhandled exception: \\\"assertion failed!\\\"\ | 6326 |
| sort.lua | timeout | per-test timeout | 8008 |
| strings.lua | fail | other: Unhandled exception: \"Unhandled exception: \\\"assertion failed!\\\"\ | 3975 |
| vararg.lua | fail | other: Unhandled exception: \"Unhandled exception: \\\"assertion failed!\\\"\ | 2152 |
| verybig.lua | pass | - | 562 |