;; lib/guest/lex.sx — character-class predicates and token primitives shared ;; across guest tokenisers. ;; ;; All predicates are nil-safe — they accept nil (end-of-input) and return ;; false. This matches the convention used by the existing per-language ;; tokenisers (cur returns nil at EOF). ;; ;; Char classes ;; ------------ ;; lex-digit? — 0-9 ;; lex-hex-digit? — 0-9, a-f, A-F ;; lex-alpha? — a-z, A-Z (alias: lex-letter?) ;; lex-alnum? — alpha or digit ;; lex-ident-start? — alpha or underscore ;; lex-ident-char? — ident-start or digit ;; lex-space? — " ", "\t", "\r" (no newline) ;; lex-whitespace? — " ", "\t", "\r", "\n" (includes newline) ;; ;; Token record ;; ------------ ;; (lex-make-token TYPE VALUE POS) — {:type :value :pos} ;; (lex-make-token-spanning TYPE VALUE POS END) ;; — {:type :value :pos :end} ;; (lex-token-type TOK) ;; (lex-token-value TOK) ;; (lex-token-pos TOK) (define lex-digit? (fn (c) (and (not (= c nil)) (>= c "0") (<= c "9")))) (define lex-hex-digit? (fn (c) (and (not (= c nil)) (or (lex-digit? c) (and (>= c "a") (<= c "f")) (and (>= c "A") (<= c "F")))))) (define lex-alpha? (fn (c) (and (not (= c nil)) (or (and (>= c "a") (<= c "z")) (and (>= c "A") (<= c "Z")))))) (define lex-letter? lex-alpha?) (define lex-alnum? (fn (c) (or (lex-alpha? c) (lex-digit? c)))) (define lex-ident-start? (fn (c) (or (lex-alpha? c) (= c "_")))) (define lex-ident-char? (fn (c) (or (lex-ident-start? c) (lex-digit? c)))) (define lex-space? (fn (c) (or (= c " ") (= c "\t") (= c "\r")))) (define lex-whitespace? (fn (c) (or (lex-space? c) (= c "\n")))) (define lex-make-token (fn (type value pos) {:pos pos :value value :type type})) (define lex-make-token-spanning (fn (type value pos end) {:pos pos :end end :value value :type type})) (define lex-token-type (fn (tok) (get tok :type))) (define lex-token-value (fn (tok) (get tok :value))) (define lex-token-pos (fn (tok) (get tok :pos)))