import json
from pathlib import Path

from .config import settings
from .services.pdf_parser import parse_pdf


def run() -> None:
    input_dir = Path(settings.pdf_input_dir)
    if not input_dir.exists():
        raise FileNotFoundError(f"PDF input dir not found: {input_dir}")

    pdf_files = sorted(input_dir.glob("*.pdf"))
    report = {
        "input_dir": str(input_dir),
        "pdf_count": len(pdf_files),
        "documents": [],
    }

    ocr_needed_count = 0
    metadata_only_count = 0
    parsed_text_count = 0

    for pdf in pdf_files:
        parsed = parse_pdf(pdf)
        if parsed.extraction_status == "ocr_needed":
            ocr_needed_count += 1
        elif parsed.extraction_status == "metadata_only":
            metadata_only_count += 1
        elif parsed.extraction_status == "parsed_text":
            parsed_text_count += 1

        report["documents"].append(
            {
                "filename": pdf.name,
                "title_from_filename": parsed.title,
                "date_from_filename": parsed.excursion_date.isoformat() if parsed.excursion_date else None,
                "start_time_text": parsed.start_time_text,
                "start_temperature_c": parsed.start_temperature_c,
                "car_distance_km": parsed.car_distance_km,
                "min_altitude_m": parsed.min_altitude_m,
                "max_altitude_m": parsed.max_altitude_m,
                "page_count_estimated": parsed.page_count_estimated,
                "has_text_layer": parsed.has_text_layer,
                "extraction_status": parsed.extraction_status,
                "raw_text_excerpt": parsed.raw_text_excerpt[:500],
            }
        )

    report["summary"] = {
        "ocr_needed_count": ocr_needed_count,
        "metadata_only_count": metadata_only_count,
        "parsed_text_count": parsed_text_count,
        "pending_full_parse_count": ocr_needed_count + metadata_only_count,
    }

    output_path = Path("storage/private/pdf_profile_report.json")
    output_path.write_text(json.dumps(report, indent=2), encoding="utf-8")
    print(f"Profile report written to {output_path}")


if __name__ == "__main__":
    run()
