from __future__ import annotations

from pathlib import Path
import io
import mimetypes
import re
import uuid

import fitz
from fastapi import HTTPException, UploadFile, status
import numpy as np
from PIL import Image, ImageFilter, ImageOps
from pypdf import PdfReader
import pytesseract

from app.core.config import get_settings
from app.services.llm_client import request_llm_chat_completion
from app.services.tenant_store import MenuAssetRecord, SessionIdentity, get_tenant_store


IMAGE_EXTENSIONS = {".jpg", ".jpeg", ".png", ".webp"}
TEXT_EXTENSIONS = {".txt", ".md", ".csv"}
PDF_EXTENSIONS = {".pdf"}
ALLOWED_EXTENSIONS = IMAGE_EXTENSIONS | TEXT_EXTENSIONS | PDF_EXTENSIONS


def normalize_asset_name(value: str | None, *, fallback: str = "materiale-menu") -> str:
    raw = Path(value or "").name.strip()
    normalized = re.sub(r"\s+", " ", raw)
    return normalized[:180] or fallback


def resolve_asset_mime_type(filename: str, content_type: str | None) -> str:
    normalized_type = (content_type or "").split(";", 1)[0].strip().lower()
    if normalized_type:
        return normalized_type

    guessed_type, _ = mimetypes.guess_type(filename)
    return (guessed_type or "application/octet-stream").lower()


def resolve_asset_kind(filename: str, mime_type: str) -> str:
    suffix = Path(filename).suffix.lower()
    if suffix in PDF_EXTENSIONS or mime_type == "application/pdf":
        return "pdf"
    if suffix in IMAGE_EXTENSIONS or mime_type.startswith("image/"):
        return "image"
    if suffix in TEXT_EXTENSIONS or mime_type.startswith("text/"):
        return "text"
    return "other"


def normalize_text_block(value: str) -> str:
    normalized = value.replace("\x00", " ")
    normalized = normalized.replace("\r\n", "\n").replace("\r", "\n")
    normalized = re.sub(r"[ \t]+\n", "\n", normalized)
    normalized = re.sub(r"[ \t]{2,}", " ", normalized)
    normalized = re.sub(r"\n{3,}", "\n\n", normalized)
    return normalized.strip()


def truncate_text(value: str, limit: int) -> str:
    cleaned = normalize_text_block(value)
    if len(cleaned) <= limit:
        return cleaned
    return cleaned[: max(limit - 1, 0)].rstrip() + "…"


def _meaningful_text_lines(value: str, *, limit: int = 40) -> list[str]:
    lines: list[str] = []
    seen: set[str] = set()

    for raw_line in normalize_text_block(value).splitlines():
        line = re.sub(r"\s+", " ", raw_line).strip(" \t-•|")
        if len(line) < 4:
            continue
        alnum_count = sum(character.isalnum() for character in line)
        if alnum_count < 4 and "€" not in line:
            continue
        wordish_tokens = re.findall(r"[A-Za-zÀ-ÿ]{3,}", line)
        if "€" not in line and len(wordish_tokens) < 2:
            continue
        normalized_key = line.casefold()
        if normalized_key in seen:
            continue
        seen.add(normalized_key)
        lines.append(line)
        if len(lines) >= limit:
            break

    return lines


def build_menu_asset_fallback_summary(*, display_name: str, kind: str, extracted_text: str) -> str:
    settings = get_settings()
    kind_label = {
        "image": "foto",
        "pdf": "pdf",
        "text": "testo",
    }.get(kind, "file")
    selected_lines = _meaningful_text_lines(extracted_text, limit=28)
    if not selected_lines:
        cleaned_text = truncate_text(extracted_text, settings.menu_asset_analysis_max_chars)
        if not cleaned_text:
            raise ValueError("Nessun testo leggibile estratto dal file.")
        return cleaned_text

    summary_parts = [
        f"Fonte: {display_name} ({kind_label}).",
        "Testo utile estratto dal materiale caricato:",
        *selected_lines,
    ]
    return truncate_text("\n".join(summary_parts), settings.menu_asset_analysis_max_chars)


def _default_suffix(kind: str) -> str:
    if kind == "pdf":
        return ".pdf"
    if kind == "image":
        return ".png"
    if kind == "text":
        return ".txt"
    return ""


def _decode_text_file(raw_bytes: bytes) -> str:
    for encoding in ("utf-8-sig", "utf-8", "latin-1"):
        try:
            return raw_bytes.decode(encoding)
        except UnicodeDecodeError:
            continue
    return raw_bytes.decode("utf-8", errors="ignore")


def _resize_for_ocr(image: Image.Image, *, max_side: int = 2200) -> Image.Image:
    width, height = image.size
    largest_side = max(width, height)
    if largest_side <= max_side:
        return image

    scale = max_side / float(largest_side)
    return image.resize(
        (max(1, int(width * scale)), max(1, int(height * scale))),
        resample=Image.Resampling.LANCZOS,
    )


def _safe_osd_rotation(image: Image.Image) -> int:
    try:
        osd = pytesseract.image_to_osd(image, config="--psm 0")
    except Exception:
        return 0

    match = re.search(r"Rotate:\s+(\d+)", osd)
    if not match:
        return 0

    try:
        return int(match.group(1)) % 360
    except ValueError:
        return 0


def _otsu_threshold(array: np.ndarray) -> int:
    histogram, _ = np.histogram(array.ravel(), bins=256, range=(0, 256))
    total = array.size
    sum_total = float(np.dot(np.arange(256), histogram))
    sum_background = 0.0
    weight_background = 0.0
    best_threshold = 127
    best_variance = -1.0

    for threshold, count in enumerate(histogram):
        weight_background += float(count)
        if weight_background == 0.0:
            continue

        weight_foreground = total - weight_background
        if weight_foreground == 0.0:
            break

        sum_background += threshold * float(count)
        mean_background = sum_background / weight_background
        mean_foreground = (sum_total - sum_background) / weight_foreground
        variance = weight_background * weight_foreground * (mean_background - mean_foreground) ** 2

        if variance > best_variance:
            best_variance = variance
            best_threshold = threshold

    return best_threshold


def _projection_score(image: Image.Image, angle: float) -> float:
    rotated = image.rotate(angle, resample=Image.Resampling.BICUBIC, expand=True, fillcolor=255)
    array = np.asarray(rotated)
    threshold = _otsu_threshold(array)
    binary = (array < threshold).astype(np.uint8)
    if binary.sum() == 0:
        return -1.0
    return float(np.var(binary.sum(axis=1)))


def _best_skew_angle(image: Image.Image) -> float:
    preview = _resize_for_ocr(image, max_side=1100)
    coarse_angles = np.arange(-12.0, 12.1, 1.0)
    coarse_angle = max(coarse_angles, key=lambda angle: _projection_score(preview, float(angle)))

    fine_angles = np.arange(coarse_angle - 1.0, coarse_angle + 1.01, 0.25)
    return float(max(fine_angles, key=lambda angle: _projection_score(preview, float(angle))))


def _prepare_image_for_ocr(image: Image.Image, *, cardinal_rotation: int = 0) -> Image.Image:
    prepared = image.convert("L")
    prepared = _resize_for_ocr(prepared)
    prepared = ImageOps.autocontrast(prepared)
    if cardinal_rotation:
        prepared = prepared.rotate(-cardinal_rotation, resample=Image.Resampling.BICUBIC, expand=True, fillcolor=255)
    best_angle = _best_skew_angle(prepared)
    if abs(best_angle) >= 0.35:
        prepared = prepared.rotate(best_angle, resample=Image.Resampling.BICUBIC, expand=True, fillcolor=255)

    prepared = ImageOps.autocontrast(prepared)
    prepared = prepared.filter(ImageFilter.SHARPEN)
    return prepared


def _prepare_image_for_layout_ocr(image: Image.Image, *, cardinal_rotation: int = 0) -> Image.Image:
    prepared = image.convert("L")
    prepared = _resize_for_ocr(prepared)
    prepared = ImageOps.autocontrast(prepared)
    if cardinal_rotation:
        prepared = prepared.rotate(-cardinal_rotation, resample=Image.Resampling.BICUBIC, expand=True, fillcolor=255)
    return prepared


def _average_ocr_confidence(image: Image.Image, *, languages: str, config: str) -> float:
    try:
        data = pytesseract.image_to_data(
            image,
            lang=languages,
            config=config,
            output_type=pytesseract.Output.DICT,
        )
    except Exception:
        return 0.0

    confidences = [
        float(value)
        for value in data.get("conf", [])
        if value not in ("-1", -1, "", None)
    ]
    if not confidences:
        return 0.0
    return sum(confidences) / len(confidences)


def _score_ocr_text(value: str, *, confidence: float = 0.0) -> tuple[float, int, int, int, int]:
    normalized = normalize_text_block(value)
    if not normalized:
        return (-1.0, 0, 0, 0, 0)
    alnum_count = sum(character.isalnum() for character in normalized)
    token_count = len(re.findall(r"[A-Za-z0-9]+", normalized))
    wordish_count = len(re.findall(r"[A-Za-zÀ-ÿ]{3,}", normalized))
    return (confidence, wordish_count, alnum_count, token_count, len(normalized))


def _extract_best_ocr_candidate(
    image: Image.Image,
    *,
    languages: str,
    configs: tuple[str, ...],
) -> tuple[str, tuple[float, int, int, int, int], str]:
    best_text = ""
    best_score = (-1.0, 0, 0, 0, 0)
    best_config = configs[0]

    for config in configs:
        extracted = pytesseract.image_to_string(image, lang=languages, config=config)
        cleaned = normalize_text_block(extracted)
        confidence = _average_ocr_confidence(image, languages=languages, config=config) if cleaned else 0.0
        score = _score_ocr_text(cleaned, confidence=confidence)
        if score > best_score:
            best_text = cleaned
            best_score = score
            best_config = config

    return best_text, best_score, best_config


def _clean_joined_ocr_text(parts: list[str]) -> str:
    text = " ".join(parts)
    text = re.sub(r"\s+([,.;:!?])", r"\1", text)
    text = re.sub(r"([€(])\s+", r"\1", text)
    text = re.sub(r"\s+\)", ")", text)
    return text.strip()


def _classify_layout_segment(text: str) -> str:
    if re.search(r"€\s*\d", text):
        return "PRICE"

    letters = [character for character in text if character.isalpha()]
    if not letters:
        return "DETAIL"

    upper_ratio = sum(character.isupper() for character in letters) / len(letters)
    wordish_tokens = re.findall(r"[A-Za-zÀ-ÿ]{3,}", text)
    if "." not in text and len(wordish_tokens) <= 5 and upper_ratio >= 0.45:
        return "TITLE"
    return "DETAIL"


def _build_layout_ocr_text(image: Image.Image, *, languages: str, config: str) -> str:
    try:
        data = pytesseract.image_to_data(
            image,
            lang=languages,
            config=config,
            output_type=pytesseract.Output.DICT,
        )
    except Exception:
        return ""

    rows: dict[tuple[int, int, int], list[dict[str, float | int | str]]] = {}
    total_items = len(data.get("text", []))
    for index in range(total_items):
        text = str(data["text"][index]).strip()
        raw_confidence = str(data["conf"][index]).strip()
        confidence = float(raw_confidence) if raw_confidence not in ("", "-1") else -1.0
        if not text:
            continue

        alnum_count = sum(character.isalnum() for character in text)
        if confidence < 10.0 and "€" not in text and alnum_count < 3:
            continue

        key = (
            int(data["block_num"][index]),
            int(data["par_num"][index]),
            int(data["line_num"][index]),
        )
        rows.setdefault(key, []).append(
            {
                "text": text,
                "left": int(data["left"][index]),
                "top": int(data["top"][index]),
                "width": int(data["width"][index]),
                "height": int(data["height"][index]),
            }
        )

    if not rows:
        return ""

    layout_rows: list[dict[str, object]] = []
    rendered_rows: list[tuple[int, str]] = []
    for words in rows.values():
        sorted_words = sorted(words, key=lambda item: int(item["left"]))
        average_height = sum(int(item["height"]) for item in sorted_words) / len(sorted_words)
        gap_threshold = max(44, int(average_height * 2.8))
        segments: list[list[dict[str, float | int | str]]] = []

        current_segment: list[dict[str, float | int | str]] = []
        current_right = 0
        for word in sorted_words:
            left = int(word["left"])
            width = int(word["width"])
            if current_segment and left - current_right > gap_threshold:
                segments.append(current_segment)
                current_segment = []
            current_segment.append(word)
            current_right = left + width
        if current_segment:
            segments.append(current_segment)

        rendered_segments: list[str] = []
        row_segments: list[dict[str, object]] = []
        top = min(int(item["top"]) for item in sorted_words)
        for segment in segments:
            left = min(int(item["left"]) for item in segment)
            segment_text = _clean_joined_ocr_text([str(item["text"]) for item in segment])
            if not segment_text:
                continue
            label = _classify_layout_segment(segment_text)
            rendered_segments.append(f"[x={left} {label}] {segment_text}")
            row_segments.append({"left": left, "label": label, "text": segment_text})

        if rendered_segments:
            layout_rows.append({"top": top, "segments": row_segments})
            rendered_rows.append((top, f"ROW y={top}: " + " | ".join(rendered_segments)))

    rendered_rows.sort(key=lambda item: item[0])
    rendered_rows_text = "\n".join(row for _, row in rendered_rows)
    card_layout_text = _build_card_layout_text(layout_rows)
    if card_layout_text and rendered_rows_text:
        return f"{card_layout_text}\n\n{rendered_rows_text}"
    return card_layout_text or rendered_rows_text


def _build_card_layout_text(layout_rows: list[dict[str, object]]) -> str:
    if not layout_rows:
        return ""

    sorted_rows = sorted(layout_rows, key=lambda row: int(row["top"]))
    candidate_bands: list[dict[str, object]] = []
    current_band: dict[str, object] | None = None

    for row in sorted_rows:
        segments = row["segments"]
        title_segments = [segment for segment in segments if segment["label"] == "TITLE"]
        price_segments = [segment for segment in segments if segment["label"] == "PRICE"]
        if len(title_segments) < 2 or price_segments:
            continue

        row_top = int(row["top"])
        if current_band is None or row_top - int(current_band["last_top"]) > 54:
            current_band = {"start_top": row_top, "last_top": row_top, "rows": [row]}
            candidate_bands.append(current_band)
        else:
            current_band["last_top"] = row_top
            current_band["rows"].append(row)

    candidate_bands = [
        band
        for band in candidate_bands
        if sum(1 for row in band["rows"] for segment in row["segments"] if segment["label"] == "TITLE") >= 3
    ]
    if not candidate_bands:
        return ""

    rendered_cards: list[str] = []
    for band_index, band in enumerate(candidate_bands):
        band_rows = band["rows"]
        title_segments = [segment for row in band_rows for segment in row["segments"] if segment["label"] == "TITLE"]
        price_rows = [
            row
            for row in sorted_rows
            if int(row["top"]) > int(band["last_top"])
            and (
                band_index == len(candidate_bands) - 1
                or int(row["top"]) < int(candidate_bands[band_index + 1]["start_top"])
            )
            and any(segment["label"] == "PRICE" for segment in row["segments"])
        ]
        price_segments = [segment for row in price_rows for segment in row["segments"] if segment["label"] == "PRICE"]
        anchor_positions = sorted(
            int(segment["left"]) for segment in title_segments + price_segments if int(segment["left"]) < 1600
        )
        if not anchor_positions:
            continue

        clustered_positions: list[list[int]] = []
        for left in anchor_positions:
            if not clustered_positions or left - clustered_positions[-1][-1] > 120:
                clustered_positions.append([left])
            else:
                clustered_positions[-1].append(left)

        cards = [
            {
                "anchor_left": int(round(sum(cluster) / len(cluster))),
                "title_parts": [],
                "detail_parts": [],
                "prices": [],
            }
            for cluster in clustered_positions
        ]

        section_rows = [
            row
            for row in sorted_rows
            if int(row["top"]) >= int(band["start_top"])
            and (
                band_index == len(candidate_bands) - 1
                or int(row["top"]) < int(candidate_bands[band_index + 1]["start_top"])
            )
        ]

        for row in section_rows:
            for segment in row["segments"]:
                left = int(segment["left"])
                card = min(cards, key=lambda candidate: abs(int(candidate["anchor_left"]) - left))
                if segment["label"] == "TITLE":
                    card["title_parts"].append(str(segment["text"]))
                elif segment["label"] == "PRICE":
                    card["prices"].append(str(segment["text"]))
                else:
                    card["detail_parts"].append(str(segment["text"]))

        for card in cards:
            title = normalize_text_block(" ".join(card["title_parts"]))
            details = [
                item
                for item in (normalize_text_block(detail) for detail in card["detail_parts"])
                if item and item not in {"|", "||", ";", "3", "~", "n", "i", "4"}
            ]
            prices = [normalize_text_block(price) for price in card["prices"] if normalize_text_block(price)]
            if not title or "LIST" in title.upper():
                continue
            if not details and not prices:
                continue

            rendered_cards.append(
                "\n".join(
                    [
                        f"CARD x={card['anchor_left']}",
                        f"TITLE: {title}",
                        *(f"DETAIL: {detail}" for detail in details),
                        *(f"PRICE: {price}" for price in prices),
                    ]
                )
            )

    if not rendered_cards:
        return ""
    return "OCR ricostruito per card:\n" + "\n\n".join(rendered_cards)


def _image_to_ocr_text(image: Image.Image) -> str:
    settings = get_settings()
    normalized_image = ImageOps.exif_transpose(image)
    osd_rotation = _safe_osd_rotation(normalized_image)
    candidate_rotations = [rotation for rotation in (osd_rotation, 0, 90, 180, 270) if rotation in {0, 90, 180, 270}]

    seen_rotations: set[int] = set()
    best_text = ""
    best_score = (-1.0, 0, 0, 0, 0)
    best_config = "--psm 4"
    best_image: Image.Image | None = None

    for rotation in candidate_rotations:
        if rotation in seen_rotations:
            continue
        seen_rotations.add(rotation)

        layout_candidate = _prepare_image_for_layout_ocr(normalized_image, cardinal_rotation=rotation)
        layout_text, layout_score, layout_config = _extract_best_ocr_candidate(
            layout_candidate,
            languages=settings.menu_asset_ocr_languages,
            configs=("--psm 4", "--psm 6"),
        )
        if layout_score > best_score:
            best_text = layout_text
            best_score = layout_score
            best_config = layout_config
            best_image = layout_candidate

        prepared_candidate = _prepare_image_for_ocr(normalized_image, cardinal_rotation=rotation)
        prepared_text, prepared_score, prepared_config = _extract_best_ocr_candidate(
            prepared_candidate,
            languages=settings.menu_asset_ocr_languages,
            configs=("--psm 6", "--psm 11"),
        )
        if prepared_score > best_score:
            best_text = prepared_text
            best_score = prepared_score
            best_config = prepared_config
            best_image = prepared_candidate

    if best_image is None:
        return best_text

    layout_text = _build_layout_ocr_text(
        best_image,
        languages=settings.menu_asset_ocr_languages,
        config=best_config,
    )
    if layout_text:
        return normalize_text_block(
            "\n".join(
                [
                    "OCR strutturato con coordinate:",
                    layout_text,
                    "",
                    "OCR lineare:",
                    best_text,
                ]
            )
        )

    return best_text


def extract_text_from_image(path: Path) -> str:
    with Image.open(path) as image:
        return normalize_text_block(_image_to_ocr_text(image))


def extract_text_from_text_file(path: Path) -> str:
    raw_bytes = path.read_bytes()
    return normalize_text_block(_decode_text_file(raw_bytes))


def extract_text_from_pdf(path: Path) -> str:
    settings = get_settings()
    reader = PdfReader(str(path))
    page_limit = min(len(reader.pages), settings.menu_asset_pdf_max_pages)
    chunks: list[str] = []

    for page_index in range(page_limit):
        try:
            page_text = reader.pages[page_index].extract_text() or ""
        except Exception:
            page_text = ""
        if page_text.strip():
            chunks.append(page_text)
        if len("\n\n".join(chunks)) >= settings.menu_asset_extracted_text_max_chars:
            break

    extracted = truncate_text("\n\n".join(chunks), settings.menu_asset_extracted_text_max_chars)
    if len(extracted) >= 160:
        return extracted

    ocr_chunks: list[str] = []
    document = fitz.open(str(path))
    try:
        for page_index in range(min(document.page_count, settings.menu_asset_pdf_max_pages)):
            page = document.load_page(page_index)
            pixmap = page.get_pixmap(matrix=fitz.Matrix(2, 2), alpha=False)
            with Image.open(io.BytesIO(pixmap.tobytes("png"))) as image:
                ocr_text = _image_to_ocr_text(image)
            if ocr_text.strip():
                ocr_chunks.append(ocr_text)
            if len("\n\n".join(ocr_chunks)) >= settings.menu_asset_extracted_text_max_chars:
                break
    finally:
        document.close()

    return truncate_text("\n\n".join(ocr_chunks), settings.menu_asset_extracted_text_max_chars)


def extract_menu_asset_text(path: Path, kind: str) -> str:
    if kind == "pdf":
        return extract_text_from_pdf(path)
    if kind == "image":
        return extract_text_from_image(path)
    if kind == "text":
        return extract_text_from_text_file(path)
    raise ValueError(f"Tipo file non supportato: {kind}")


async def summarize_menu_asset(
    *,
    venue_name: str,
    display_name: str,
    kind: str,
    extracted_text: str,
) -> str:
    settings = get_settings()
    clipped_text = truncate_text(extracted_text, settings.menu_asset_extracted_text_max_chars)
    if not clipped_text:
        raise ValueError("Nessun testo leggibile estratto dal file.")

    try:
        reply, _ = await request_llm_chat_completion(
            [
                {
                    "role": "system",
                    "content": (
                        "Trasforma il materiale grezzo del locale in una sintesi operativa per il prompt del concierge menu. "
                        "Rispondi in italiano. Usa solo dati espliciti nel testo. Non inventare prezzi, piatti, drink, "
                        "ingredienti o allergeni. Se nel testo compaiono blocchi 'OCR ricostruito per card', trattali come "
                        "la fonte principale: ogni CARD corrisponde a una voce del menu, con TITLE, DETAIL e PRICE gia "
                        "raggruppati. Usa le righe OCR con coordinate solo come supporto secondario. Non trasformare gli "
                        "ingredienti in drink separati. Ignora intestazioni generali, brand, parole isolate e card "
                        "palesemente corrotte o inaffidabili. Quando possibile, restituisci le bevande come voci complete "
                        "nel formato 'Nome drink: ingredienti/note principali. Prezzo: ...'. Restituisci solo una sintesi "
                        "pronta da incorporare in un prompt, senza preamboli e senza markdown."
                    ),
                },
                {
                    "role": "user",
                    "content": (
                        f"Locale: {venue_name}\n"
                        f"File: {display_name}\n"
                        f"Tipo: {kind}\n\n"
                        "Organizza la sintesi in questo ordine, solo se i dati esistono davvero: "
                        "offerta food, offerta drink, prezzi, allergeni/note, particolarita commerciali.\n\n"
                        f"Testo estratto:\n{clipped_text}"
                    ),
                },
            ],
            temperature=0.1,
        )
        summary = truncate_text(reply, settings.menu_asset_analysis_max_chars)
        if summary:
            return summary
    except HTTPException:
        pass

    return build_menu_asset_fallback_summary(
        display_name=display_name,
        kind=kind,
        extracted_text=clipped_text,
    )


async def build_menu_assets_context(venue_name: str, assets: list[MenuAssetRecord]) -> str:
    settings = get_settings()
    ready_assets = [asset for asset in assets if asset.status == "ready" and asset.analysis_text.strip()]
    if not ready_assets:
        return ""

    source_sections = [
        f"Fonte: {asset.display_name}\n{truncate_text(asset.analysis_text, settings.menu_asset_analysis_max_chars)}"
        for asset in ready_assets
    ]
    merged_sources = "\n\n".join(source_sections)

    try:
        reply, _ = await request_llm_chat_completion(
            [
                {
                    "role": "system",
                    "content": (
                        "Unifica i materiali del locale in un unico contesto breve e affidabile per il prompt del concierge menu. "
                        "Rispondi in italiano. Usa solo dati presenti nelle fonti. Niente markdown, niente introduzioni. "
                        "Mantieni uno stile sintetico e pronto da inserire in un system prompt."
                    ),
                },
                {
                    "role": "user",
                    "content": (
                        f"Locale: {venue_name}\n\n"
                        "Unifica le fonti in una sola sintesi utile al menu assistant. "
                        "Se due fonti sembrano in conflitto, segnalalo invece di scegliere arbitrariamente.\n\n"
                        f"{truncate_text(merged_sources, settings.menu_asset_context_max_chars)}"
                    ),
                },
            ],
            temperature=0.1,
        )
        context = truncate_text(reply, settings.menu_asset_context_max_chars)
        if context:
            return context
    except HTTPException:
        pass

    return truncate_text(merged_sources, settings.menu_asset_context_max_chars)


async def ingest_menu_asset(session: SessionIdentity, upload: UploadFile, *, venue_name: str | None = None) -> MenuAssetRecord:
    settings = get_settings()
    store = get_tenant_store()
    display_name = normalize_asset_name(upload.filename)
    mime_type = resolve_asset_mime_type(display_name, upload.content_type)
    kind = resolve_asset_kind(display_name, mime_type)

    if kind == "other":
        raise HTTPException(
            status_code=status.HTTP_415_UNSUPPORTED_MEDIA_TYPE,
            detail=f"Formato non supportato per {display_name}. Carica PDF, immagini o file di testo.",
        )

    raw_bytes = await upload.read()
    if not raw_bytes:
        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=f"{display_name}: file vuoto.")

    if len(raw_bytes) > settings.menu_asset_max_upload_bytes:
        raise HTTPException(
            status_code=status.HTTP_413_REQUEST_ENTITY_TOO_LARGE,
            detail=f"{display_name}: supera il limite di {settings.menu_asset_max_upload_bytes // (1024 * 1024)} MB.",
        )

    asset_id = f"menu_asset_{uuid.uuid4().hex}"
    suffix = Path(display_name).suffix.lower() or _default_suffix(kind)
    storage_dir = store.menu_assets_directory(session.tenant_id)
    storage_path = storage_dir / f"{asset_id}{suffix}"
    storage_path.write_bytes(raw_bytes)

    record = store.create_menu_asset(
        asset_id=asset_id,
        tenant_id=session.tenant_id,
        original_name=display_name,
        display_name=display_name,
        mime_type=mime_type,
        kind=kind,
        file_size_bytes=len(raw_bytes),
        storage_path=str(storage_path),
        status="processing",
    )

    extracted_text = ""
    try:
        extracted_text = extract_menu_asset_text(storage_path, kind)
        if not extracted_text:
            raise ValueError("Nessun testo leggibile estratto dal file.")
        analysis_text = await summarize_menu_asset(
            venue_name=venue_name or session.tenant_name,
            display_name=display_name,
            kind=kind,
            extracted_text=extracted_text,
        )
        return store.update_menu_asset(
            session.tenant_id,
            asset_id,
            extracted_text=extracted_text,
            analysis_text=analysis_text,
            status="ready",
        )
    except Exception as exc:
        if isinstance(exc, HTTPException):
            raw_detail = exc.detail if isinstance(exc.detail, str) else "Analisi del file non riuscita."
        else:
            raw_detail = str(exc)
        detail = normalize_text_block(raw_detail) or "Analisi del file non riuscita."
        return store.update_menu_asset(
            session.tenant_id,
            asset_id,
            extracted_text=truncate_text(extracted_text, settings.menu_asset_extracted_text_max_chars),
            analysis_text="",
            status="error",
            error_detail=truncate_text(detail, 500),
        )
