feat: initialize market app with browsing, product, and scraping code
Some checks failed
Build and Deploy / build-and-deploy (push) Has been cancelled

Split from coop monolith. Includes:
- Market/browse/product blueprints
- Product sync API
- Suma scraping pipeline
- Templates for market, browse, and product views
- Dockerfile and CI workflow for independent deployment
This commit is contained in:
giles
2026-02-09 23:16:34 +00:00
commit 6271a715a1
142 changed files with 8517 additions and 0 deletions

44
scrape/html_utils.py Normal file
View File

@@ -0,0 +1,44 @@
# suma_browser/html_utils.py
from __future__ import annotations
from typing import Optional
from bs4 import BeautifulSoup
from urllib.parse import urljoin
from config import config
def to_fragment(html: Optional[str]) -> str:
"""Return just the fragment contents (no <html>/<body> wrappers)."""
if not html:
return ""
soup = BeautifulSoup(html, "lxml")
# unwrap document-level containers
for t in soup.find_all(["html", "body"]):
t.unwrap()
return "".join(str(c) for c in soup.contents).strip()
def absolutize_fragment(html: Optional[str]) -> str:
"""Absolutize href/src against BASE_URL and return a fragment (no wrappers)."""
if not html:
return ""
frag = BeautifulSoup(html, "lxml")
for tag in frag.find_all(True):
if tag.has_attr("href"):
raw = str(tag["href"])
abs_href = urljoin(config()["base_url"], raw) if raw.startswith("/") else raw
#if rewrite_suma_href_to_local:
# local = rewrite_suma_href_to_local(abs_href)
# tag["href"] = local if local else abs_href
#else:
tag["href"] = abs_href
if tag.has_attr("src"):
raw = str(tag["src"])
tag["src"] = urljoin(config()["base_url"], raw) if raw.startswith("/") else raw
# unwrap wrappers and return only the inner HTML
for t in frag.find_all(["html", "body"]):
t.unwrap()
return "".join(str(c) for c in frag.contents).strip()