Phase 1-3 of decoupling plan: - Shared DB, models, infrastructure, browser, config, utils - Event infrastructure (domain_events outbox, bus, processor) - Structured logging - Generic container concept (container_type/container_id) - Alembic migrations for all schema changes Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
97 lines
2.7 KiB
Python
97 lines
2.7 KiB
Python
import re
|
|
from bs4 import BeautifulSoup
|
|
import json
|
|
import os
|
|
from typing import Iterable, Union, List
|
|
|
|
from quart import request
|
|
|
|
def soup_of(html: str) -> BeautifulSoup:
|
|
return BeautifulSoup(html, "lxml")
|
|
|
|
def normalize_text(s: str) -> str:
|
|
return re.sub(r"\s+", " ", (s or "").strip())
|
|
|
|
def log(msg: str) -> None:
|
|
print(msg, flush=True)
|
|
|
|
def ensure_dir(path: str) -> None:
|
|
os.makedirs(path, exist_ok=True)
|
|
|
|
def dump_json(path: str, data) -> None:
|
|
ensure_dir(os.path.dirname(path))
|
|
with open(path, "w", encoding="utf-8") as f:
|
|
json.dump(data, f, ensure_ascii=False, indent=2)
|
|
|
|
|
|
def _join_url_parts(parts: List[str]) -> str:
|
|
parts = [p for p in parts if p is not None and p != ""]
|
|
if not parts:
|
|
return ""
|
|
|
|
# Preserve scheme like "https://"
|
|
m = re.match(r"^([a-zA-Z][a-zA-Z0-9+.-]*://)(.*)$", parts[0])
|
|
if m:
|
|
scheme, first = m.group(1), m.group(2)
|
|
else:
|
|
scheme, first = "", parts[0]
|
|
|
|
cleaned = [first.strip("/")]
|
|
for seg in parts[1:]:
|
|
seg = str(seg)
|
|
if seg.startswith("?") or seg.startswith("#"):
|
|
cleaned[-1] = cleaned[-1] + seg # attach query/fragment
|
|
else:
|
|
cleaned.append(seg.strip("/"))
|
|
|
|
url = scheme + "/".join(s for s in cleaned if s != "")
|
|
|
|
# Preserve trailing slash if caller's last segment had one (and isn't ? or #)
|
|
last = str(parts[-1])
|
|
if last.endswith("/") and not last.startswith(("?", "#")) and not url.endswith("/"):
|
|
url += "/"
|
|
|
|
return url
|
|
|
|
def hx_fragment_request() -> bool:
|
|
return request.headers.get("HX-Request", "").lower() == "true"
|
|
def route_prefix():
|
|
return f"{request.scheme}://{request.host}/{request.headers.get('x-forwarded-prefix', '/')}"
|
|
|
|
def join_url(value: Union[str, Iterable[str]]):
|
|
if isinstance(value, str):
|
|
parts = [value]
|
|
else:
|
|
parts = list(value)
|
|
return _join_url_parts(parts)
|
|
|
|
def host_url(value: str='', no_slash=False):
|
|
"""
|
|
Join g.route with value and ensure the resulting URL has a trailing slash
|
|
on the path, but never after query/fragment.
|
|
|
|
Examples:
|
|
http://jjj -> http://jjj/
|
|
http://jjj?hello -> http://jjj/?hello
|
|
/foo -> /foo/
|
|
/foo?x=1#frag -> /foo/?x=1#frag
|
|
"""
|
|
url = join_url([route_prefix(), value])
|
|
|
|
# Ensure trailing slash on the PATH (before ? or #)
|
|
# Split into: base (no ?/#), optional ?query, optional #fragment
|
|
if no_slash:
|
|
return url
|
|
m = re.match(r'^(?P<base>[^?#]*)(?P<qs>\?[^#]*)?(?P<frag>#.*)?$', url)
|
|
if not m:
|
|
return url # fallback: return as-is
|
|
|
|
base = m.group('base') or ""
|
|
qs = m.group('qs') or ""
|
|
frag = m.group('frag') or ""
|
|
|
|
if base and not base.endswith('/'):
|
|
base += '/'
|
|
|
|
return f"{base}{qs}{frag}"
|