from typing import Optional, Dict, Any, List from urllib.parse import urljoin import httpx from bs4 import BeautifulSoup from config import config class LoginFailed(Exception): def __init__(self, message: str, *, debug: Dict[str, Any]): super().__init__(message) self.debug = debug def _ff_headers(referer: Optional[str] = None, origin: Optional[str] = None) -> Dict[str, str]: h = { "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:142.0) Gecko/20100101 Firefox/142.0", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", "Accept-Language": "en-GB,en;q=0.5", "Accept-Encoding": "gzip, deflate, br, zstd", "Connection": "keep-alive", "Upgrade-Insecure-Requests": "1", "DNT": "1", "Sec-GPC": "1", "Cache-Control": "no-cache", "Pragma": "no-cache", } if referer: h["Referer"] = referer if origin: h["Origin"] = origin return h def _cookie_header_from_jar(jar: httpx.Cookies, domain: str, path: str = "/") -> str: pairs: List[str] = [] for c in jar.jar: if not c.name or c.value is None: continue dom = (c.domain or "").lstrip(".") if not dom: continue if not (domain == dom or domain.endswith("." + dom) or dom.endswith("." + domain)): continue if not (path.startswith(c.path or "/")): continue pairs.append(f"{c.name}={c.value}") return "; ".join(pairs) def _extract_magento_errors(html_text: str) -> list[str]: msgs: list[str] = [] try: soup = BeautifulSoup(html_text or "", "lxml") for sel in [ ".message-error", ".messages .message-error", ".page.messages .message-error", "[data-ui-id='message-error']", ".message.warning", ".message.notice", ]: for box in soup.select(sel): t = " ".join((box.get_text(" ") or "").split()) if t and t not in msgs: msgs.append(t) except Exception: pass return msgs def _looks_like_login_page(html_text: str) -> bool: try: s = BeautifulSoup(html_text or "", "lxml") if s.select_one("form#login-form.form-login"): return True title = (s.title.get_text() if s.title else "").strip().lower() if "customer login" in title: return True except Exception: pass return False def _chrome_headers(referer=None, origin=None): headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " "(KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", "Accept-Language": "en-US,en;q=0.5", "Accept-Encoding": "gzip, deflate, br", "Connection": "keep-alive", "Upgrade-Insecure-Requests": "1", } if referer: headers["Referer"] = referer if origin: headers["Origin"] = origin return headers async def login( username: str, password: str, *, extra_cookies = {}, # ok to pass cf_clearance etc., but NOT form_key timeout: float = 30.0, ) -> httpx.Cookies: """ Attempt login and return an authenticated cookie jar. Success criteria (strict): 1) /customer/section/load?sections=customer reports is_logged_in == True OR 2) GET /customer/account/ resolves to an account page (not the login page). Otherwise raises LoginFailed with debug info. """ limits = httpx.Limits(max_connections=10, max_keepalive_connections=6) cookies = httpx.Cookies() for k, v in { **extra_cookies, "pr-cookie-consent": '["all"]', "user_allowed_save_cookie": '{"1":1}', }.items(): if k.lower() == "form_key": continue cookies.set(k, v, domain="wholesale.suma.coop", path="/") base_login = config()["base_login"] base_url = config()["base_url"] async with httpx.AsyncClient( follow_redirects=True, timeout=httpx.Timeout(timeout, connect=15.0), http2=True, limits=limits, cookies=cookies, headers=_chrome_headers(), trust_env=True, ) as client: # 1) GET login page for fresh form_key import time login_bust = base_login + ("&" if "?" in base_login else "?") + f"_={int(time.time()*1000)}" login_bust = base_login r_get = await client.get(login_bust, headers=_chrome_headers()) print("Login GET failed. Status:", r_get.status_code) print("Login GET URL:", r_get.url) print("Response text:", r_get.text[:1000]) # trim if long r_get.raise_for_status() soup = BeautifulSoup(r_get.text, "lxml") form = soup.select_one("form.form.form-login#login-form") or soup.select_one("#login-form") if not form: raise LoginFailed( "Login form not found (possible bot challenge or theme change).", debug={"get_status": r_get.status_code, "final_url": str(r_get.url)}, ) action = urljoin(base_login, form.get("action") or base_login) fk_el = form.find("input", attrs={"name": "form_key"}) hidden_form_key = (fk_el.get("value") if fk_el else "") or "" # mirror Magento behavior: form_key also appears as a cookie client.cookies.set("form_key", hidden_form_key, domain="wholesale.suma.coop", path="/") payload = { "form_key": hidden_form_key, "login[username]": username, "login[password]": password, "send": "Login", } post_headers = _chrome_headers(referer=base_login, origin=base_url) post_headers["Content-Type"] = "application/x-www-form-urlencoded" post_headers["Cookie"] = _cookie_header_from_jar( client.cookies, domain="wholesale.suma.coop", path="/customer/" ) r_post = await client.post(action, data=payload, headers=post_headers) # 2) Primary check: sections API must say logged in is_logged_in = False sections_url = "https://wholesale.suma.coop/customer/section/load/?sections=customer&force_new_section_timestamp=1" section_json: Dict[str, Any] = {} try: r_sec = await client.get(sections_url, headers=_chrome_headers(referer=base_login)) if r_sec.status_code == 200: section_json = r_sec.json() cust = section_json.get("customer") or {} is_logged_in = bool(cust.get("is_logged_in")) except Exception: pass # 3) Secondary check: account page should NOT be the login page looks_like_login = False final_account_url = "" try: r_acc = await client.get("https://wholesale.suma.coop/customer/account/", headers=_chrome_headers(referer=base_login)) final_account_url = str(r_acc.url) looks_like_login = ( "/customer/account/login" in final_account_url or _looks_like_login_page(r_acc.text) ) except Exception: # ignore; we'll rely on section status pass # Decide success/failure strictly if not (is_logged_in or (final_account_url and not looks_like_login)): errors = _extract_magento_errors(r_post.text) # Clean up transient form_key cookie try: client.cookies.jar.clear("wholesale.suma.coop", "/", "form_key") except Exception: pass raise LoginFailed( errors[0] if errors else "Invalid username or password.", debug={ "get_status": r_get.status_code, "post_status": r_post.status_code, "post_final_url": str(r_post.url), "sections_customer": section_json.get("customer"), "account_final_url": final_account_url, "looks_like_login_page": looks_like_login, }, ) def clear_cookie_everywhere(cookies: httpx.Cookies, name: str) -> None: to_delete = [] for c in list(cookies.jar): # http.cookiejar.Cookie objects if c.name == name: # Note: CookieJar.clear requires exact (domain, path, name) to_delete.append((c.domain, c.path, c.name)) for domain, path, nm in to_delete: try: cookies.jar.clear(domain, path, nm) except KeyError: # Mismatch can happen if domain has a leading dot vs not, etc. # Try again with a normalized domain variant. if domain and domain.startswith("."): cookies.jar.clear(domain.lstrip("."), path, nm) else: # or try with leading dot cookies.jar.clear("." + domain, path, nm) if name in cookies: del cookies[name] clear_cookie_everywhere(client.cookies, "form_key") #client.cookies.jar.clear(config()["base_host"] or "wholesale.suma.coop", "/", "form_key") print('cookies', client.cookies) return client.cookies