lua: minimal Lua pattern engine for string.find (classes/anchors/quantifiers)
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Has been cancelled

This commit is contained in:
2026-04-24 23:05:48 +00:00
parent 3ec52d4556
commit bd0377b6a3
4 changed files with 210 additions and 44 deletions

View File

@@ -696,6 +696,163 @@
(dict-set! coroutine "wrap" lua-coroutine-wrap)
;; ── string library ────────────────────────────────────────────
;; ── Lua patterns (minimal) ────────────────────────────────────
;; Predicates for single-char classes
(define
lua-pat-class-match
(fn (class c)
(cond
((= class "d") (and (>= c "0") (<= c "9")))
((= class "D") (not (and (>= c "0") (<= c "9"))))
((= class "a") (or (and (>= c "a") (<= c "z")) (and (>= c "A") (<= c "Z"))))
((= class "A") (not (or (and (>= c "a") (<= c "z")) (and (>= c "A") (<= c "Z")))))
((= class "s") (or (= c " ") (= c "\t") (= c "\n") (= c "\r")))
((= class "S") (not (or (= c " ") (= c "\t") (= c "\n") (= c "\r"))))
((= class "w")
(or (and (>= c "a") (<= c "z")) (and (>= c "A") (<= c "Z")) (and (>= c "0") (<= c "9"))))
((= class "W")
(not (or (and (>= c "a") (<= c "z")) (and (>= c "A") (<= c "Z")) (and (>= c "0") (<= c "9")))))
((= class "l") (and (>= c "a") (<= c "z")))
((= class "L") (not (and (>= c "a") (<= c "z"))))
((= class "u") (and (>= c "A") (<= c "Z")))
((= class "U") (not (and (>= c "A") (<= c "Z"))))
((= class "p") (or (and (>= c "!") (<= c "/")) (and (>= c ":") (<= c "@")) (and (>= c "[") (<= c "`")) (and (>= c "{") (<= c "~"))))
((= class "P") (not (or (and (>= c "!") (<= c "/")) (and (>= c ":") (<= c "@")) (and (>= c "[") (<= c "`")) (and (>= c "{") (<= c "~")))))
((= class "c") (or (< c " ") (= c "\127")))
((= class "C") (not (or (< c " ") (= c "\127"))))
((= class "x")
(or (and (>= c "0") (<= c "9")) (and (>= c "a") (<= c "f")) (and (>= c "A") (<= c "F"))))
((= class "X")
(not (or (and (>= c "0") (<= c "9")) (and (>= c "a") (<= c "f")) (and (>= c "A") (<= c "F")))))
(else (= c class)))))
;; Match a single "pattern atom" (char or class) at src position — returns true or false.
;; pat-pos is position of the atom start. Returns the atom's length in pat (1 or 2).
(define
lua-pat-atom-len
(fn (pat pat-pos)
(cond
((>= pat-pos (len pat)) 0)
((= (char-at pat pat-pos) "%") 2)
(else 1))))
(define
lua-pat-atom-match
(fn (pat pat-pos s s-pos)
(cond
((>= s-pos (len s)) false)
((>= pat-pos (len pat)) false)
(else
(let ((pc (char-at pat pat-pos)) (sc (char-at s s-pos)))
(cond
((= pc ".") true)
((= pc "%")
(cond
((>= (+ pat-pos 1) (len pat)) false)
(else (lua-pat-class-match (char-at pat (+ pat-pos 1)) sc))))
(else (= pc sc))))))))
;; Match pattern against string starting at s-pos and pat-pos. Returns end-pos (inclusive+1) or -1 on no-match.
(define
lua-pat-match
(fn (pat pat-pos s s-pos)
(cond
((>= pat-pos (len pat)) s-pos)
((and (= (char-at pat pat-pos) "$") (= (+ pat-pos 1) (len pat)))
(if (= s-pos (len s)) s-pos -1))
(else
(let
((alen (lua-pat-atom-len pat pat-pos)))
(let
((next-pat (+ pat-pos alen)))
(let
((qc (if (< next-pat (len pat)) (char-at pat next-pat) "")))
(cond
((= qc "*") (lua-pat-match-greedy pat pat-pos alen s s-pos (+ next-pat 1) 0))
((= qc "+") (lua-pat-match-greedy pat pat-pos alen s s-pos (+ next-pat 1) 1))
((= qc "-") (lua-pat-match-lazy pat pat-pos alen s s-pos (+ next-pat 1)))
((= qc "?")
(let ((tried (if (lua-pat-atom-match pat pat-pos s s-pos)
(lua-pat-match pat (+ next-pat 1) s (+ s-pos 1))
-1)))
(if (>= tried 0) tried (lua-pat-match pat (+ next-pat 1) s s-pos))))
(else
(cond
((lua-pat-atom-match pat pat-pos s s-pos)
(lua-pat-match pat next-pat s (+ s-pos 1)))
(else -1)))))))))))
(define
lua-pat-match-greedy
(fn (pat atom-pos atom-len s s-pos rest-pat-pos min-count)
(let ((count 0) (i s-pos))
(begin
(define
count-loop
(fn ()
(when (lua-pat-atom-match pat atom-pos s i)
(begin (set! i (+ i 1)) (set! count (+ count 1)) (count-loop)))))
(count-loop)
(let ((best -1))
(begin
(define
try-loop
(fn (k)
(when (and (< best 0) (>= k min-count))
(let ((r (lua-pat-match pat rest-pat-pos s (+ s-pos k))))
(cond
((>= r 0) (set! best r))
(else (try-loop (- k 1))))))))
(try-loop count)
best))))))
(define
lua-pat-match-lazy
(fn (pat atom-pos atom-len s s-pos rest-pat-pos)
(let ((best -1) (i s-pos))
(begin
(define
try-loop
(fn ()
(when (< best 0)
(let ((r (lua-pat-match pat rest-pat-pos s i)))
(cond
((>= r 0) (set! best r))
((and (< i (len s)) (lua-pat-atom-match pat atom-pos s i))
(begin (set! i (+ i 1)) (try-loop)))
(else (set! best -2)))))))
(try-loop)
(if (= best -2) -1 best)))))
;; Top-level find: return (start-index-0based . end-index) or nil on no match.
;; If pat starts with ^, anchor to init. Otherwise scan.
(define
lua-pat-find
(fn (pat s init)
(let
((anchored (and (> (len pat) 0) (= (char-at pat 0) "^"))))
(let
((start-pat (if anchored 1 0)))
(cond
(anchored
(let ((end (lua-pat-match pat start-pat s init)))
(if (>= end 0) (list init end) nil)))
(else
(let ((i init) (result nil))
(begin
(define
scan
(fn ()
(when (and (= result nil) (<= i (len s)))
(let ((end (lua-pat-match pat 0 s i)))
(cond
((>= end 0) (set! result (list i end)))
(else (begin (set! i (+ i 1)) (scan))))))))
(scan)
result))))))))
(define string {})
(define __ascii-32-126 " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~")
@@ -784,20 +941,28 @@
(fn (&rest args)
(let ((s (first args))
(pat (nth args 1))
(init (if (> (len args) 2) (nth args 2) 1)))
(let ((start-i (cond
((< init 0) (+ (len s) init 1))
((= init 0) 1)
(else init))))
(let ((sub (if (<= start-i 1) s (substring s (- start-i 1) (len s)))))
(let ((idx (index-of sub pat)))
(cond
((< idx 0) nil)
(else
(list
(quote lua-multi)
(+ start-i idx)
(+ start-i idx (len pat) -1))))))))))
(init (if (> (len args) 2) (nth args 2) 1))
(plain (if (> (len args) 3) (nth args 3) false)))
(let ((start-i0 (cond
((< init 0) (let ((v (+ (len s) init))) (if (< v 0) 0 v)))
((= init 0) 0)
(else (- init 1)))))
(cond
((lua-truthy? plain)
(let ((sub (if (<= start-i0 0) s (substring s start-i0 (len s)))))
(let ((idx (index-of sub pat)))
(cond
((< idx 0) nil)
(else
(list
(quote lua-multi)
(+ start-i0 idx 1)
(+ start-i0 idx (len pat))))))))
(else
(let ((r (lua-pat-find pat s start-i0)))
(cond
((= r nil) nil)
(else (list (quote lua-multi) (+ (first r) 1) (nth r 1)))))))))))
;; Literal-only string.match: returns matched substring or nil (no captures since no pattern).
(define