import re
from bs4 import BeautifulSoup
import json
import os
from typing import Iterable, Union, List

from quart import request

def soup_of(html: str) -> BeautifulSoup:
    return BeautifulSoup(html, "lxml")

def normalize_text(s: str) -> str:
    return re.sub(r"\s+", " ", (s or "").strip())

def log(msg: str) -> None:
    print(msg, flush=True)

def ensure_dir(path: str) -> None:
    os.makedirs(path, exist_ok=True)

def dump_json(path: str, data) -> None:
    ensure_dir(os.path.dirname(path))
    with open(path, "w", encoding="utf-8") as f:
        json.dump(data, f, ensure_ascii=False, indent=2)


def _join_url_parts(parts: List[str]) -> str:
    parts = [p for p in parts if p is not None and p != ""]
    if not parts:
        return ""

    # Preserve scheme like "https://"
    m = re.match(r"^([a-zA-Z][a-zA-Z0-9+.-]*://)(.*)$", parts[0])
    if m:
        scheme, first = m.group(1), m.group(2)
    else:
        scheme, first = "", parts[0]

    cleaned = [first.strip("/")]
    for seg in parts[1:]:
        seg = str(seg)
        # If a later segment is already an absolute URL, use it as the base
        m2 = re.match(r"^([a-zA-Z][a-zA-Z0-9+.-]*://)(.*)$", seg)
        if m2:
            scheme, first = m2.group(1), m2.group(2)
            cleaned = [first.strip("/")]
        elif seg.startswith("?") or seg.startswith("#"):
            cleaned[-1] = cleaned[-1] + seg  # attach query/fragment
        else:
            cleaned.append(seg.strip("/"))

    url = scheme + "/".join(s for s in cleaned if s != "")

    # Preserve trailing slash if caller's last segment had one (and isn't ? or #)
    last = str(parts[-1])
    if last.endswith("/") and not last.startswith(("?", "#")) and not url.endswith("/"):
        url += "/"

    return url

def hx_fragment_request() -> bool:
    return request.headers.get("SX-Request", "").lower() == "true" or request.headers.get("HX-Request", "").lower() == "true"
def route_prefix():
    return f"{request.scheme}://{request.host}/{request.headers.get('x-forwarded-prefix', '/')}"

def join_url(value: Union[str, Iterable[str]]):
  if isinstance(value, str):
      parts = [value]
  else:
      parts = list(value)
  return _join_url_parts(parts)

def host_url(value: str='', no_slash=False):
    """
    Join g.route with value and ensure the resulting URL has a trailing slash
    on the path, but never after query/fragment.

    Examples:
      http://jjj          -> http://jjj/
      http://jjj?hello    -> http://jjj/?hello
      /foo                -> /foo/
      /foo?x=1#frag       -> /foo/?x=1#frag
    """
    url = join_url([route_prefix(), value])

    # Ensure trailing slash on the PATH (before ? or #)
    # Split into: base (no ?/#), optional ?query, optional #fragment
    if no_slash:
        return url
    m = re.match(r'^(?P<base>[^?#]*)(?P<qs>\?[^#]*)?(?P<frag>#.*)?$', url)
    if not m:
        return url  # fallback: return as-is

    base = m.group('base') or ""
    qs = m.group('qs') or ""
    frag = m.group('frag') or ""

    if base and not base.endswith('/'):
        base += '/'

    return f"{base}{qs}{frag}"