import hashlib
from pathlib import Path

from .config import settings
from .db import get_connection
from .services.pdf_parser import parse_pdf


def file_sha256(path: Path) -> str:
    digest = hashlib.sha256()
    with path.open("rb") as f:
        for chunk in iter(lambda: f.read(8192), b""):
            digest.update(chunk)
    return digest.hexdigest()


def upsert_excursion_from_pdf(pdf_path: Path) -> None:
    parsed = parse_pdf(pdf_path)
    pdf_hash = file_sha256(pdf_path)
    file_size = pdf_path.stat().st_size

    sql_doc = """
    INSERT INTO source_documents (
      filename,
      sha256,
      file_size_bytes,
      page_count_estimated,
      filename_date,
      filename_title,
      text_layer_detected,
      extraction_status,
      raw_text_excerpt
    ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
    ON DUPLICATE KEY UPDATE
      sha256 = VALUES(sha256),
      file_size_bytes = VALUES(file_size_bytes),
      page_count_estimated = VALUES(page_count_estimated),
      filename_date = VALUES(filename_date),
      filename_title = VALUES(filename_title),
      text_layer_detected = VALUES(text_layer_detected),
      extraction_status = VALUES(extraction_status),
      raw_text_excerpt = VALUES(raw_text_excerpt)
    """

    sql_doc_id = "SELECT id FROM source_documents WHERE filename = %s"

    sql_excursion = """
    INSERT INTO excursions (
      source_document_id,
      title,
      excursion_date,
      start_time_text,
      start_temperature_c,
      car_distance_km,
      min_altitude_m,
      max_altitude_m,
      summary
    ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
    ON DUPLICATE KEY UPDATE
      title = VALUES(title),
      excursion_date = VALUES(excursion_date),
      start_time_text = VALUES(start_time_text),
      start_temperature_c = VALUES(start_temperature_c),
      car_distance_km = VALUES(car_distance_km),
      min_altitude_m = VALUES(min_altitude_m),
      max_altitude_m = VALUES(max_altitude_m),
      summary = VALUES(summary)
    """

    with get_connection() as conn:
        with conn.cursor() as cursor:
            cursor.execute(
                sql_doc,
                (
                    pdf_path.name,
                    pdf_hash,
                    file_size,
                    parsed.page_count_estimated,
                    parsed.excursion_date,
                    parsed.title,
                    1 if parsed.has_text_layer else 0,
                    parsed.extraction_status,
                    parsed.raw_text_excerpt,
                ),
            )

            cursor.execute(sql_doc_id, (pdf_path.name,))
            row = cursor.fetchone()
            if row is None:
                raise RuntimeError(f"Unable to resolve source document id for {pdf_path.name}")

            source_document_id = row[0]
            cursor.execute(
                sql_excursion,
                (
                    source_document_id,
                    parsed.title,
                    parsed.excursion_date,
                    parsed.start_time_text,
                    parsed.start_temperature_c,
                    parsed.car_distance_km,
                    parsed.min_altitude_m,
                    parsed.max_altitude_m,
                    parsed.summary,
                ),
            )
        conn.commit()


def run() -> None:
    input_dir = Path(settings.pdf_input_dir)
    if not input_dir.exists():
        raise FileNotFoundError(f"PDF input dir not found: {input_dir}")

    pdf_files = sorted(input_dir.glob("*.pdf"))
    print(f"Found {len(pdf_files)} pdf files in {input_dir}")

    for pdf_file in pdf_files:
        print(f"Processing: {pdf_file.name}")
        upsert_excursion_from_pdf(pdf_file)

    print("Ingestion complete.")


if __name__ == "__main__":
    run()
