Monorepo: consolidate 7 repos into one
All checks were successful
Build and Deploy / build-and-deploy (push) Successful in 1m5s
All checks were successful
Build and Deploy / build-and-deploy (push) Successful in 1m5s
Combines shared, blog, market, cart, events, federation, and account into a single repository. Eliminates submodule sync, sibling model copying at build time, and per-app CI orchestration. Changes: - Remove per-app .git, .gitmodules, .gitea, submodule shared/ dirs - Remove stale sibling model copies from each app - Update all 6 Dockerfiles for monorepo build context (root = .) - Add build directives to docker-compose.yml - Add single .gitea/workflows/ci.yml with change detection - Add .dockerignore for monorepo build context - Create __init__.py for federation and account (cross-app imports)
This commit is contained in:
244
market/scrape/get_auth.py
Normal file
244
market/scrape/get_auth.py
Normal file
@@ -0,0 +1,244 @@
|
||||
from typing import Optional, Dict, Any, List
|
||||
from urllib.parse import urljoin
|
||||
import httpx
|
||||
from bs4 import BeautifulSoup
|
||||
from shared.config import config
|
||||
|
||||
class LoginFailed(Exception):
|
||||
def __init__(self, message: str, *, debug: Dict[str, Any]):
|
||||
super().__init__(message)
|
||||
self.debug = debug
|
||||
|
||||
def _ff_headers(referer: Optional[str] = None, origin: Optional[str] = None) -> Dict[str, str]:
|
||||
h = {
|
||||
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:142.0) Gecko/20100101 Firefox/142.0",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
||||
"Accept-Language": "en-GB,en;q=0.5",
|
||||
"Accept-Encoding": "gzip, deflate, br, zstd",
|
||||
"Connection": "keep-alive",
|
||||
"Upgrade-Insecure-Requests": "1",
|
||||
"DNT": "1",
|
||||
"Sec-GPC": "1",
|
||||
"Cache-Control": "no-cache",
|
||||
"Pragma": "no-cache",
|
||||
}
|
||||
if referer:
|
||||
h["Referer"] = referer
|
||||
if origin:
|
||||
h["Origin"] = origin
|
||||
return h
|
||||
|
||||
def _cookie_header_from_jar(jar: httpx.Cookies, domain: str, path: str = "/") -> str:
|
||||
pairs: List[str] = []
|
||||
for c in jar.jar:
|
||||
if not c.name or c.value is None:
|
||||
continue
|
||||
dom = (c.domain or "").lstrip(".")
|
||||
if not dom:
|
||||
continue
|
||||
if not (domain == dom or domain.endswith("." + dom) or dom.endswith("." + domain)):
|
||||
continue
|
||||
if not (path.startswith(c.path or "/")):
|
||||
continue
|
||||
pairs.append(f"{c.name}={c.value}")
|
||||
return "; ".join(pairs)
|
||||
|
||||
def _extract_magento_errors(html_text: str) -> list[str]:
|
||||
msgs: list[str] = []
|
||||
try:
|
||||
soup = BeautifulSoup(html_text or "", "lxml")
|
||||
for sel in [
|
||||
".message-error",
|
||||
".messages .message-error",
|
||||
".page.messages .message-error",
|
||||
"[data-ui-id='message-error']",
|
||||
".message.warning",
|
||||
".message.notice",
|
||||
]:
|
||||
for box in soup.select(sel):
|
||||
t = " ".join((box.get_text(" ") or "").split())
|
||||
if t and t not in msgs:
|
||||
msgs.append(t)
|
||||
except Exception:
|
||||
pass
|
||||
return msgs
|
||||
|
||||
def _looks_like_login_page(html_text: str) -> bool:
|
||||
try:
|
||||
s = BeautifulSoup(html_text or "", "lxml")
|
||||
if s.select_one("form#login-form.form-login"):
|
||||
return True
|
||||
title = (s.title.get_text() if s.title else "").strip().lower()
|
||||
if "customer login" in title:
|
||||
return True
|
||||
except Exception:
|
||||
pass
|
||||
return False
|
||||
|
||||
def _chrome_headers(referer=None, origin=None):
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
|
||||
"(KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
||||
"Accept-Language": "en-US,en;q=0.5",
|
||||
"Accept-Encoding": "gzip, deflate, br",
|
||||
"Connection": "keep-alive",
|
||||
"Upgrade-Insecure-Requests": "1",
|
||||
}
|
||||
if referer:
|
||||
headers["Referer"] = referer
|
||||
if origin:
|
||||
headers["Origin"] = origin
|
||||
return headers
|
||||
|
||||
async def login(
|
||||
username: str,
|
||||
password: str,
|
||||
*,
|
||||
extra_cookies = {}, # ok to pass cf_clearance etc., but NOT form_key
|
||||
timeout: float = 30.0,
|
||||
) -> httpx.Cookies:
|
||||
"""
|
||||
Attempt login and return an authenticated cookie jar.
|
||||
|
||||
Success criteria (strict):
|
||||
1) /customer/section/load?sections=customer reports is_logged_in == True
|
||||
OR
|
||||
2) GET /customer/account/ resolves to an account page (not the login page).
|
||||
|
||||
Otherwise raises LoginFailed with debug info.
|
||||
"""
|
||||
limits = httpx.Limits(max_connections=10, max_keepalive_connections=6)
|
||||
cookies = httpx.Cookies()
|
||||
for k, v in {
|
||||
**extra_cookies,
|
||||
"pr-cookie-consent": '["all"]',
|
||||
"user_allowed_save_cookie": '{"1":1}',
|
||||
}.items():
|
||||
if k.lower() == "form_key":
|
||||
continue
|
||||
cookies.set(k, v, domain="wholesale.suma.coop", path="/")
|
||||
|
||||
base_login = config()["base_login"]
|
||||
base_url = config()["base_url"]
|
||||
|
||||
async with httpx.AsyncClient(
|
||||
follow_redirects=True,
|
||||
timeout=httpx.Timeout(timeout, connect=15.0),
|
||||
http2=True,
|
||||
limits=limits,
|
||||
cookies=cookies,
|
||||
headers=_chrome_headers(),
|
||||
trust_env=True,
|
||||
) as client:
|
||||
# 1) GET login page for fresh form_key
|
||||
import time
|
||||
login_bust = base_login + ("&" if "?" in base_login else "?") + f"_={int(time.time()*1000)}"
|
||||
login_bust = base_login
|
||||
r_get = await client.get(login_bust, headers=_chrome_headers())
|
||||
print("Login GET failed. Status:", r_get.status_code)
|
||||
print("Login GET URL:", r_get.url)
|
||||
print("Response text:", r_get.text[:1000]) # trim if long
|
||||
r_get.raise_for_status()
|
||||
soup = BeautifulSoup(r_get.text, "lxml")
|
||||
|
||||
form = soup.select_one("form.form.form-login#login-form") or soup.select_one("#login-form")
|
||||
if not form:
|
||||
raise LoginFailed(
|
||||
"Login form not found (possible bot challenge or theme change).",
|
||||
debug={"get_status": r_get.status_code, "final_url": str(r_get.url)},
|
||||
)
|
||||
action = urljoin(base_login, form.get("action") or base_login)
|
||||
fk_el = form.find("input", attrs={"name": "form_key"})
|
||||
hidden_form_key = (fk_el.get("value") if fk_el else "") or ""
|
||||
|
||||
# mirror Magento behavior: form_key also appears as a cookie
|
||||
client.cookies.set("form_key", hidden_form_key, domain="wholesale.suma.coop", path="/")
|
||||
|
||||
payload = {
|
||||
"form_key": hidden_form_key,
|
||||
"login[username]": username,
|
||||
"login[password]": password,
|
||||
"send": "Login",
|
||||
}
|
||||
|
||||
post_headers = _chrome_headers(referer=base_login, origin=base_url)
|
||||
post_headers["Content-Type"] = "application/x-www-form-urlencoded"
|
||||
post_headers["Cookie"] = _cookie_header_from_jar(
|
||||
client.cookies, domain="wholesale.suma.coop", path="/customer/"
|
||||
)
|
||||
|
||||
r_post = await client.post(action, data=payload, headers=post_headers)
|
||||
|
||||
# 2) Primary check: sections API must say logged in
|
||||
is_logged_in = False
|
||||
sections_url = "https://wholesale.suma.coop/customer/section/load/?sections=customer&force_new_section_timestamp=1"
|
||||
section_json: Dict[str, Any] = {}
|
||||
try:
|
||||
r_sec = await client.get(sections_url, headers=_chrome_headers(referer=base_login))
|
||||
if r_sec.status_code == 200:
|
||||
section_json = r_sec.json()
|
||||
cust = section_json.get("customer") or {}
|
||||
is_logged_in = bool(cust.get("is_logged_in"))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 3) Secondary check: account page should NOT be the login page
|
||||
looks_like_login = False
|
||||
final_account_url = ""
|
||||
try:
|
||||
r_acc = await client.get("https://wholesale.suma.coop/customer/account/", headers=_chrome_headers(referer=base_login))
|
||||
final_account_url = str(r_acc.url)
|
||||
looks_like_login = (
|
||||
"/customer/account/login" in final_account_url
|
||||
or _looks_like_login_page(r_acc.text)
|
||||
)
|
||||
except Exception:
|
||||
# ignore; we'll rely on section status
|
||||
pass
|
||||
|
||||
# Decide success/failure strictly
|
||||
if not (is_logged_in or (final_account_url and not looks_like_login)):
|
||||
errors = _extract_magento_errors(r_post.text)
|
||||
# Clean up transient form_key cookie
|
||||
try:
|
||||
client.cookies.jar.clear("wholesale.suma.coop", "/", "form_key")
|
||||
except Exception:
|
||||
pass
|
||||
raise LoginFailed(
|
||||
errors[0] if errors else "Invalid username or password.",
|
||||
debug={
|
||||
"get_status": r_get.status_code,
|
||||
"post_status": r_post.status_code,
|
||||
"post_final_url": str(r_post.url),
|
||||
"sections_customer": section_json.get("customer"),
|
||||
"account_final_url": final_account_url,
|
||||
"looks_like_login_page": looks_like_login,
|
||||
},
|
||||
)
|
||||
def clear_cookie_everywhere(cookies: httpx.Cookies, name: str) -> None:
|
||||
to_delete = []
|
||||
for c in list(cookies.jar): # http.cookiejar.Cookie objects
|
||||
if c.name == name:
|
||||
# Note: CookieJar.clear requires exact (domain, path, name)
|
||||
to_delete.append((c.domain, c.path, c.name))
|
||||
|
||||
for domain, path, nm in to_delete:
|
||||
try:
|
||||
cookies.jar.clear(domain, path, nm)
|
||||
except KeyError:
|
||||
# Mismatch can happen if domain has a leading dot vs not, etc.
|
||||
# Try again with a normalized domain variant.
|
||||
if domain and domain.startswith("."):
|
||||
|
||||
cookies.jar.clear(domain.lstrip("."), path, nm)
|
||||
else:
|
||||
# or try with leading dot
|
||||
cookies.jar.clear("." + domain, path, nm)
|
||||
if name in cookies:
|
||||
del cookies[name]
|
||||
|
||||
clear_cookie_everywhere(client.cookies, "form_key")
|
||||
#client.cookies.jar.clear(config()["base_host"] or "wholesale.suma.coop", "/", "form_key")
|
||||
print('cookies', client.cookies)
|
||||
return client.cookies
|
||||
Reference in New Issue
Block a user