lua: minimal Lua pattern engine for string.find (classes/anchors/quantifiers)
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Has been cancelled
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Has been cancelled
This commit is contained in:
@@ -696,6 +696,163 @@
|
||||
(dict-set! coroutine "wrap" lua-coroutine-wrap)
|
||||
|
||||
;; ── string library ────────────────────────────────────────────
|
||||
|
||||
|
||||
;; ── Lua patterns (minimal) ────────────────────────────────────
|
||||
;; Predicates for single-char classes
|
||||
(define
|
||||
lua-pat-class-match
|
||||
(fn (class c)
|
||||
(cond
|
||||
((= class "d") (and (>= c "0") (<= c "9")))
|
||||
((= class "D") (not (and (>= c "0") (<= c "9"))))
|
||||
((= class "a") (or (and (>= c "a") (<= c "z")) (and (>= c "A") (<= c "Z"))))
|
||||
((= class "A") (not (or (and (>= c "a") (<= c "z")) (and (>= c "A") (<= c "Z")))))
|
||||
((= class "s") (or (= c " ") (= c "\t") (= c "\n") (= c "\r")))
|
||||
((= class "S") (not (or (= c " ") (= c "\t") (= c "\n") (= c "\r"))))
|
||||
((= class "w")
|
||||
(or (and (>= c "a") (<= c "z")) (and (>= c "A") (<= c "Z")) (and (>= c "0") (<= c "9"))))
|
||||
((= class "W")
|
||||
(not (or (and (>= c "a") (<= c "z")) (and (>= c "A") (<= c "Z")) (and (>= c "0") (<= c "9")))))
|
||||
((= class "l") (and (>= c "a") (<= c "z")))
|
||||
((= class "L") (not (and (>= c "a") (<= c "z"))))
|
||||
((= class "u") (and (>= c "A") (<= c "Z")))
|
||||
((= class "U") (not (and (>= c "A") (<= c "Z"))))
|
||||
((= class "p") (or (and (>= c "!") (<= c "/")) (and (>= c ":") (<= c "@")) (and (>= c "[") (<= c "`")) (and (>= c "{") (<= c "~"))))
|
||||
((= class "P") (not (or (and (>= c "!") (<= c "/")) (and (>= c ":") (<= c "@")) (and (>= c "[") (<= c "`")) (and (>= c "{") (<= c "~")))))
|
||||
((= class "c") (or (< c " ") (= c "\127")))
|
||||
((= class "C") (not (or (< c " ") (= c "\127"))))
|
||||
((= class "x")
|
||||
(or (and (>= c "0") (<= c "9")) (and (>= c "a") (<= c "f")) (and (>= c "A") (<= c "F"))))
|
||||
((= class "X")
|
||||
(not (or (and (>= c "0") (<= c "9")) (and (>= c "a") (<= c "f")) (and (>= c "A") (<= c "F")))))
|
||||
(else (= c class)))))
|
||||
|
||||
;; Match a single "pattern atom" (char or class) at src position — returns true or false.
|
||||
;; pat-pos is position of the atom start. Returns the atom's length in pat (1 or 2).
|
||||
(define
|
||||
lua-pat-atom-len
|
||||
(fn (pat pat-pos)
|
||||
(cond
|
||||
((>= pat-pos (len pat)) 0)
|
||||
((= (char-at pat pat-pos) "%") 2)
|
||||
(else 1))))
|
||||
|
||||
(define
|
||||
lua-pat-atom-match
|
||||
(fn (pat pat-pos s s-pos)
|
||||
(cond
|
||||
((>= s-pos (len s)) false)
|
||||
((>= pat-pos (len pat)) false)
|
||||
(else
|
||||
(let ((pc (char-at pat pat-pos)) (sc (char-at s s-pos)))
|
||||
(cond
|
||||
((= pc ".") true)
|
||||
((= pc "%")
|
||||
(cond
|
||||
((>= (+ pat-pos 1) (len pat)) false)
|
||||
(else (lua-pat-class-match (char-at pat (+ pat-pos 1)) sc))))
|
||||
(else (= pc sc))))))))
|
||||
|
||||
;; Match pattern against string starting at s-pos and pat-pos. Returns end-pos (inclusive+1) or -1 on no-match.
|
||||
(define
|
||||
lua-pat-match
|
||||
(fn (pat pat-pos s s-pos)
|
||||
(cond
|
||||
((>= pat-pos (len pat)) s-pos)
|
||||
((and (= (char-at pat pat-pos) "$") (= (+ pat-pos 1) (len pat)))
|
||||
(if (= s-pos (len s)) s-pos -1))
|
||||
(else
|
||||
(let
|
||||
((alen (lua-pat-atom-len pat pat-pos)))
|
||||
(let
|
||||
((next-pat (+ pat-pos alen)))
|
||||
(let
|
||||
((qc (if (< next-pat (len pat)) (char-at pat next-pat) "")))
|
||||
(cond
|
||||
((= qc "*") (lua-pat-match-greedy pat pat-pos alen s s-pos (+ next-pat 1) 0))
|
||||
((= qc "+") (lua-pat-match-greedy pat pat-pos alen s s-pos (+ next-pat 1) 1))
|
||||
((= qc "-") (lua-pat-match-lazy pat pat-pos alen s s-pos (+ next-pat 1)))
|
||||
((= qc "?")
|
||||
(let ((tried (if (lua-pat-atom-match pat pat-pos s s-pos)
|
||||
(lua-pat-match pat (+ next-pat 1) s (+ s-pos 1))
|
||||
-1)))
|
||||
(if (>= tried 0) tried (lua-pat-match pat (+ next-pat 1) s s-pos))))
|
||||
(else
|
||||
(cond
|
||||
((lua-pat-atom-match pat pat-pos s s-pos)
|
||||
(lua-pat-match pat next-pat s (+ s-pos 1)))
|
||||
(else -1)))))))))))
|
||||
|
||||
(define
|
||||
lua-pat-match-greedy
|
||||
(fn (pat atom-pos atom-len s s-pos rest-pat-pos min-count)
|
||||
(let ((count 0) (i s-pos))
|
||||
(begin
|
||||
(define
|
||||
count-loop
|
||||
(fn ()
|
||||
(when (lua-pat-atom-match pat atom-pos s i)
|
||||
(begin (set! i (+ i 1)) (set! count (+ count 1)) (count-loop)))))
|
||||
(count-loop)
|
||||
(let ((best -1))
|
||||
(begin
|
||||
(define
|
||||
try-loop
|
||||
(fn (k)
|
||||
(when (and (< best 0) (>= k min-count))
|
||||
(let ((r (lua-pat-match pat rest-pat-pos s (+ s-pos k))))
|
||||
(cond
|
||||
((>= r 0) (set! best r))
|
||||
(else (try-loop (- k 1))))))))
|
||||
(try-loop count)
|
||||
best))))))
|
||||
|
||||
(define
|
||||
lua-pat-match-lazy
|
||||
(fn (pat atom-pos atom-len s s-pos rest-pat-pos)
|
||||
(let ((best -1) (i s-pos))
|
||||
(begin
|
||||
(define
|
||||
try-loop
|
||||
(fn ()
|
||||
(when (< best 0)
|
||||
(let ((r (lua-pat-match pat rest-pat-pos s i)))
|
||||
(cond
|
||||
((>= r 0) (set! best r))
|
||||
((and (< i (len s)) (lua-pat-atom-match pat atom-pos s i))
|
||||
(begin (set! i (+ i 1)) (try-loop)))
|
||||
(else (set! best -2)))))))
|
||||
(try-loop)
|
||||
(if (= best -2) -1 best)))))
|
||||
|
||||
;; Top-level find: return (start-index-0based . end-index) or nil on no match.
|
||||
;; If pat starts with ^, anchor to init. Otherwise scan.
|
||||
(define
|
||||
lua-pat-find
|
||||
(fn (pat s init)
|
||||
(let
|
||||
((anchored (and (> (len pat) 0) (= (char-at pat 0) "^"))))
|
||||
(let
|
||||
((start-pat (if anchored 1 0)))
|
||||
(cond
|
||||
(anchored
|
||||
(let ((end (lua-pat-match pat start-pat s init)))
|
||||
(if (>= end 0) (list init end) nil)))
|
||||
(else
|
||||
(let ((i init) (result nil))
|
||||
(begin
|
||||
(define
|
||||
scan
|
||||
(fn ()
|
||||
(when (and (= result nil) (<= i (len s)))
|
||||
(let ((end (lua-pat-match pat 0 s i)))
|
||||
(cond
|
||||
((>= end 0) (set! result (list i end)))
|
||||
(else (begin (set! i (+ i 1)) (scan))))))))
|
||||
(scan)
|
||||
result))))))))
|
||||
|
||||
(define string {})
|
||||
|
||||
(define __ascii-32-126 " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~")
|
||||
@@ -784,20 +941,28 @@
|
||||
(fn (&rest args)
|
||||
(let ((s (first args))
|
||||
(pat (nth args 1))
|
||||
(init (if (> (len args) 2) (nth args 2) 1)))
|
||||
(let ((start-i (cond
|
||||
((< init 0) (+ (len s) init 1))
|
||||
((= init 0) 1)
|
||||
(else init))))
|
||||
(let ((sub (if (<= start-i 1) s (substring s (- start-i 1) (len s)))))
|
||||
(let ((idx (index-of sub pat)))
|
||||
(cond
|
||||
((< idx 0) nil)
|
||||
(else
|
||||
(list
|
||||
(quote lua-multi)
|
||||
(+ start-i idx)
|
||||
(+ start-i idx (len pat) -1))))))))))
|
||||
(init (if (> (len args) 2) (nth args 2) 1))
|
||||
(plain (if (> (len args) 3) (nth args 3) false)))
|
||||
(let ((start-i0 (cond
|
||||
((< init 0) (let ((v (+ (len s) init))) (if (< v 0) 0 v)))
|
||||
((= init 0) 0)
|
||||
(else (- init 1)))))
|
||||
(cond
|
||||
((lua-truthy? plain)
|
||||
(let ((sub (if (<= start-i0 0) s (substring s start-i0 (len s)))))
|
||||
(let ((idx (index-of sub pat)))
|
||||
(cond
|
||||
((< idx 0) nil)
|
||||
(else
|
||||
(list
|
||||
(quote lua-multi)
|
||||
(+ start-i0 idx 1)
|
||||
(+ start-i0 idx (len pat))))))))
|
||||
(else
|
||||
(let ((r (lua-pat-find pat s start-i0)))
|
||||
(cond
|
||||
((= r nil) nil)
|
||||
(else (list (quote lua-multi) (+ (first r) 1) (nth r 1)))))))))))
|
||||
|
||||
;; Literal-only string.match: returns matched substring or nil (no captures since no pattern).
|
||||
(define
|
||||
|
||||
Reference in New Issue
Block a user