Files
rose-ash/market/scrape/get_auth.py
giles f42042ccb7
All checks were successful
Build and Deploy / build-and-deploy (push) Successful in 1m5s
Monorepo: consolidate 7 repos into one
Combines shared, blog, market, cart, events, federation, and account
into a single repository. Eliminates submodule sync, sibling model
copying at build time, and per-app CI orchestration.

Changes:
- Remove per-app .git, .gitmodules, .gitea, submodule shared/ dirs
- Remove stale sibling model copies from each app
- Update all 6 Dockerfiles for monorepo build context (root = .)
- Add build directives to docker-compose.yml
- Add single .gitea/workflows/ci.yml with change detection
- Add .dockerignore for monorepo build context
- Create __init__.py for federation and account (cross-app imports)
2026-02-24 19:44:17 +00:00

245 lines
9.3 KiB
Python

from typing import Optional, Dict, Any, List
from urllib.parse import urljoin
import httpx
from bs4 import BeautifulSoup
from shared.config import config
class LoginFailed(Exception):
def __init__(self, message: str, *, debug: Dict[str, Any]):
super().__init__(message)
self.debug = debug
def _ff_headers(referer: Optional[str] = None, origin: Optional[str] = None) -> Dict[str, str]:
h = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:142.0) Gecko/20100101 Firefox/142.0",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language": "en-GB,en;q=0.5",
"Accept-Encoding": "gzip, deflate, br, zstd",
"Connection": "keep-alive",
"Upgrade-Insecure-Requests": "1",
"DNT": "1",
"Sec-GPC": "1",
"Cache-Control": "no-cache",
"Pragma": "no-cache",
}
if referer:
h["Referer"] = referer
if origin:
h["Origin"] = origin
return h
def _cookie_header_from_jar(jar: httpx.Cookies, domain: str, path: str = "/") -> str:
pairs: List[str] = []
for c in jar.jar:
if not c.name or c.value is None:
continue
dom = (c.domain or "").lstrip(".")
if not dom:
continue
if not (domain == dom or domain.endswith("." + dom) or dom.endswith("." + domain)):
continue
if not (path.startswith(c.path or "/")):
continue
pairs.append(f"{c.name}={c.value}")
return "; ".join(pairs)
def _extract_magento_errors(html_text: str) -> list[str]:
msgs: list[str] = []
try:
soup = BeautifulSoup(html_text or "", "lxml")
for sel in [
".message-error",
".messages .message-error",
".page.messages .message-error",
"[data-ui-id='message-error']",
".message.warning",
".message.notice",
]:
for box in soup.select(sel):
t = " ".join((box.get_text(" ") or "").split())
if t and t not in msgs:
msgs.append(t)
except Exception:
pass
return msgs
def _looks_like_login_page(html_text: str) -> bool:
try:
s = BeautifulSoup(html_text or "", "lxml")
if s.select_one("form#login-form.form-login"):
return True
title = (s.title.get_text() if s.title else "").strip().lower()
if "customer login" in title:
return True
except Exception:
pass
return False
def _chrome_headers(referer=None, origin=None):
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.5",
"Accept-Encoding": "gzip, deflate, br",
"Connection": "keep-alive",
"Upgrade-Insecure-Requests": "1",
}
if referer:
headers["Referer"] = referer
if origin:
headers["Origin"] = origin
return headers
async def login(
username: str,
password: str,
*,
extra_cookies = {}, # ok to pass cf_clearance etc., but NOT form_key
timeout: float = 30.0,
) -> httpx.Cookies:
"""
Attempt login and return an authenticated cookie jar.
Success criteria (strict):
1) /customer/section/load?sections=customer reports is_logged_in == True
OR
2) GET /customer/account/ resolves to an account page (not the login page).
Otherwise raises LoginFailed with debug info.
"""
limits = httpx.Limits(max_connections=10, max_keepalive_connections=6)
cookies = httpx.Cookies()
for k, v in {
**extra_cookies,
"pr-cookie-consent": '["all"]',
"user_allowed_save_cookie": '{"1":1}',
}.items():
if k.lower() == "form_key":
continue
cookies.set(k, v, domain="wholesale.suma.coop", path="/")
base_login = config()["base_login"]
base_url = config()["base_url"]
async with httpx.AsyncClient(
follow_redirects=True,
timeout=httpx.Timeout(timeout, connect=15.0),
http2=True,
limits=limits,
cookies=cookies,
headers=_chrome_headers(),
trust_env=True,
) as client:
# 1) GET login page for fresh form_key
import time
login_bust = base_login + ("&" if "?" in base_login else "?") + f"_={int(time.time()*1000)}"
login_bust = base_login
r_get = await client.get(login_bust, headers=_chrome_headers())
print("Login GET failed. Status:", r_get.status_code)
print("Login GET URL:", r_get.url)
print("Response text:", r_get.text[:1000]) # trim if long
r_get.raise_for_status()
soup = BeautifulSoup(r_get.text, "lxml")
form = soup.select_one("form.form.form-login#login-form") or soup.select_one("#login-form")
if not form:
raise LoginFailed(
"Login form not found (possible bot challenge or theme change).",
debug={"get_status": r_get.status_code, "final_url": str(r_get.url)},
)
action = urljoin(base_login, form.get("action") or base_login)
fk_el = form.find("input", attrs={"name": "form_key"})
hidden_form_key = (fk_el.get("value") if fk_el else "") or ""
# mirror Magento behavior: form_key also appears as a cookie
client.cookies.set("form_key", hidden_form_key, domain="wholesale.suma.coop", path="/")
payload = {
"form_key": hidden_form_key,
"login[username]": username,
"login[password]": password,
"send": "Login",
}
post_headers = _chrome_headers(referer=base_login, origin=base_url)
post_headers["Content-Type"] = "application/x-www-form-urlencoded"
post_headers["Cookie"] = _cookie_header_from_jar(
client.cookies, domain="wholesale.suma.coop", path="/customer/"
)
r_post = await client.post(action, data=payload, headers=post_headers)
# 2) Primary check: sections API must say logged in
is_logged_in = False
sections_url = "https://wholesale.suma.coop/customer/section/load/?sections=customer&force_new_section_timestamp=1"
section_json: Dict[str, Any] = {}
try:
r_sec = await client.get(sections_url, headers=_chrome_headers(referer=base_login))
if r_sec.status_code == 200:
section_json = r_sec.json()
cust = section_json.get("customer") or {}
is_logged_in = bool(cust.get("is_logged_in"))
except Exception:
pass
# 3) Secondary check: account page should NOT be the login page
looks_like_login = False
final_account_url = ""
try:
r_acc = await client.get("https://wholesale.suma.coop/customer/account/", headers=_chrome_headers(referer=base_login))
final_account_url = str(r_acc.url)
looks_like_login = (
"/customer/account/login" in final_account_url
or _looks_like_login_page(r_acc.text)
)
except Exception:
# ignore; we'll rely on section status
pass
# Decide success/failure strictly
if not (is_logged_in or (final_account_url and not looks_like_login)):
errors = _extract_magento_errors(r_post.text)
# Clean up transient form_key cookie
try:
client.cookies.jar.clear("wholesale.suma.coop", "/", "form_key")
except Exception:
pass
raise LoginFailed(
errors[0] if errors else "Invalid username or password.",
debug={
"get_status": r_get.status_code,
"post_status": r_post.status_code,
"post_final_url": str(r_post.url),
"sections_customer": section_json.get("customer"),
"account_final_url": final_account_url,
"looks_like_login_page": looks_like_login,
},
)
def clear_cookie_everywhere(cookies: httpx.Cookies, name: str) -> None:
to_delete = []
for c in list(cookies.jar): # http.cookiejar.Cookie objects
if c.name == name:
# Note: CookieJar.clear requires exact (domain, path, name)
to_delete.append((c.domain, c.path, c.name))
for domain, path, nm in to_delete:
try:
cookies.jar.clear(domain, path, nm)
except KeyError:
# Mismatch can happen if domain has a leading dot vs not, etc.
# Try again with a normalized domain variant.
if domain and domain.startswith("."):
cookies.jar.clear(domain.lstrip("."), path, nm)
else:
# or try with leading dot
cookies.jar.clear("." + domain, path, nm)
if name in cookies:
del cookies[name]
clear_cookie_everywhere(client.cookies, "form_key")
#client.cookies.jar.clear(config()["base_host"] or "wholesale.suma.coop", "/", "form_key")
print('cookies', client.cookies)
return client.cookies