Monorepo: consolidate 7 repos into one
All checks were successful
Build and Deploy / build-and-deploy (push) Successful in 1m5s
All checks were successful
Build and Deploy / build-and-deploy (push) Successful in 1m5s
Combines shared, blog, market, cart, events, federation, and account into a single repository. Eliminates submodule sync, sibling model copying at build time, and per-app CI orchestration. Changes: - Remove per-app .git, .gitmodules, .gitea, submodule shared/ dirs - Remove stale sibling model copies from each app - Update all 6 Dockerfiles for monorepo build context (root = .) - Add build directives to docker-compose.yml - Add single .gitea/workflows/ci.yml with change detection - Add .dockerignore for monorepo build context - Create __init__.py for federation and account (cross-app imports)
This commit is contained in:
44
market/scrape/html_utils.py
Normal file
44
market/scrape/html_utils.py
Normal file
@@ -0,0 +1,44 @@
|
||||
# suma_browser/html_utils.py
|
||||
from __future__ import annotations
|
||||
from typing import Optional
|
||||
from bs4 import BeautifulSoup
|
||||
from urllib.parse import urljoin
|
||||
from shared.config import config
|
||||
|
||||
|
||||
|
||||
def to_fragment(html: Optional[str]) -> str:
|
||||
"""Return just the fragment contents (no <html>/<body> wrappers)."""
|
||||
if not html:
|
||||
return ""
|
||||
soup = BeautifulSoup(html, "lxml")
|
||||
|
||||
# unwrap document-level containers
|
||||
for t in soup.find_all(["html", "body"]):
|
||||
t.unwrap()
|
||||
|
||||
return "".join(str(c) for c in soup.contents).strip()
|
||||
|
||||
def absolutize_fragment(html: Optional[str]) -> str:
|
||||
"""Absolutize href/src against BASE_URL and return a fragment (no wrappers)."""
|
||||
if not html:
|
||||
return ""
|
||||
frag = BeautifulSoup(html, "lxml")
|
||||
|
||||
for tag in frag.find_all(True):
|
||||
if tag.has_attr("href"):
|
||||
raw = str(tag["href"])
|
||||
abs_href = urljoin(config()["base_url"], raw) if raw.startswith("/") else raw
|
||||
#if rewrite_suma_href_to_local:
|
||||
# local = rewrite_suma_href_to_local(abs_href)
|
||||
# tag["href"] = local if local else abs_href
|
||||
#else:
|
||||
tag["href"] = abs_href
|
||||
if tag.has_attr("src"):
|
||||
raw = str(tag["src"])
|
||||
tag["src"] = urljoin(config()["base_url"], raw) if raw.startswith("/") else raw
|
||||
|
||||
# unwrap wrappers and return only the inner HTML
|
||||
for t in frag.find_all(["html", "body"]):
|
||||
t.unwrap()
|
||||
return "".join(str(c) for c in frag.contents).strip()
|
||||
Reference in New Issue
Block a user