"""End-to-end Scalable workflow that downscales GCAM scenarios with Demeter.

This is the canonical reference workflow used by the v2.0.0 tutorial series.
It pairs with :doc:`scalable.demeter.yaml <scalable.demeter.yaml>` and is
runnable end-to-end on a laptop.

Pipeline
--------

1. ``prepare_demeter_config`` — generate one ``.ini`` config per GCAM
   scenario from a shared base template (mirrors
   ``capabilities/demeter/processing_scripts/demeter_processing_batch.py``).
2. ``run_demeter_scenario`` — invoke ``demeter.run_model(config_file=...)``
   for each scenario. This is the heavy stage that benefits from horizontal
   scaling.
3. ``aggregate_demeter_outputs`` — collect the per-scenario output
   directories and produce a summary table.

Usage
-----

After completing the one-time setup in
:ref:`tutorial_demeter_setup` (clone, install, ``demeter.get_package_data``,
optionally ``docker build -t demeter:local capabilities/demeter -f
capabilities/demeter/Dockerfile.scalable``), run::

    scalable run docs/examples/scalable.demeter.yaml \
        --target local \
        --workflow docs/examples/workflow_demeter.py

or invoke ``main()`` directly from Python.

The workflow assumes:

* Demeter is importable in the worker environment
  (``pip install -e capabilities/demeter`` from the project root).
* Example data has been downloaded via
  ``demeter.get_package_data("./demeter_data")``.
"""

from __future__ import annotations

import json
from pathlib import Path
from typing import Any

from scalable import ScalableSession, cacheable

# ---------------------------------------------------------------------------
# Configuration knobs — overridden via CLI flags or environment variables in
# the tutorials. Keep these readable; tutorials cite them by line.
# ---------------------------------------------------------------------------

DEMETER_DATA_DIR = Path("./demeter_data")
"""Where ``demeter.get_package_data(...)`` extracted the example inputs."""


def _default_base_config() -> Path:
    """Locate the base ``.ini`` shipped in Demeter's Zenodo bundle.

    The Zenodo archive layout has shifted between releases; this resolver
    prefers the v2.0.x path (``config_gcam_reference.ini`` at the data
    root) and falls back to the legacy ``example/config_gcam_reference/
    config.ini`` path or the first ``.ini`` it can find.
    """

    candidates = [
        DEMETER_DATA_DIR / "config_gcam_reference.ini",
        DEMETER_DATA_DIR / "example" / "config_gcam_reference" / "config.ini",
    ]
    for candidate in candidates:
        if candidate.exists():
            return candidate
    inis = sorted(DEMETER_DATA_DIR.rglob("*.ini"))
    if inis:
        return inis[0]
    return candidates[0]  # placeholder for tutorial code blocks


BASE_CONFIG = _default_base_config()
"""Base ``.ini`` template shipped inside the Zenodo bundle."""

OUTPUT_DIR = Path("./outputs/demeter")
"""Per-scenario outputs live under ``OUTPUT_DIR/<scenario>/``."""


# ---------------------------------------------------------------------------
# Stage 1 — preprocess: generate a per-scenario config file.
# ---------------------------------------------------------------------------


@cacheable(return_type=str, scenario=str, base_config=str, output_dir=str)
def prepare_demeter_config(
    scenario: str,
    base_config: str = str(BASE_CONFIG),
    output_dir: str = str(OUTPUT_DIR),
) -> str:
    """Materialize a Demeter ``.ini`` for one GCAM scenario.

    Parameters
    ----------
    scenario:
        Scenario identifier (e.g. ``"reference"``, ``"ssp1"``).
    base_config:
        Path to the shared base ``.ini`` to clone.
    output_dir:
        Root where per-scenario outputs are written.

    Returns
    -------
    str
        Path to the generated scenario-specific ``.ini``.
    """

    from configobj import ConfigObj  # demeter dependency

    base = Path(base_config)
    if not base.exists():  # pragma: no cover - environmental guard
        raise FileNotFoundError(
            f"Demeter base config not found at {base}. "
            "Run demeter.get_package_data('./demeter_data') first; "
            "see docs/tutorials/demeter_setup.rst."
        )

    target_dir = Path(output_dir).resolve() / scenario
    target_dir.mkdir(parents=True, exist_ok=True)

    data_root = base.parent.resolve()
    out_root = Path(output_dir).resolve()

    config = ConfigObj(str(base))
    # Demeter resolves all input paths relative to STRUCTURE.run_dir; the
    # bundled config ships with a Windows-style placeholder that we must
    # override before running.
    config["STRUCTURE"]["run_dir"] = str(data_root) + "/"
    config["STRUCTURE"]["out_dir"] = str(out_root)
    config["PARAMS"]["scenario"] = scenario
    config["PARAMS"]["run_desc"] = scenario

    target_ini = target_dir / f"demeter_{scenario}.ini"
    config.filename = str(target_ini)
    config.write()
    return str(target_ini)


# ---------------------------------------------------------------------------
# Stage 2 — run_demeter_scenario: invoke demeter.run_model for one scenario.
# ---------------------------------------------------------------------------


@cacheable(return_type=dict, config_file=str)
def run_demeter_scenario(config_file: str) -> dict[str, Any]:
    """Run Demeter end-to-end for a single GCAM scenario.

    Wraps :func:`demeter.run_model`. The function is ``@cacheable`` so reruns
    against unchanged inputs are skipped — invaluable when one scenario in a
    100-scenario fleet fails and only a subset needs to be re-executed.
    """

    import demeter  # late import: keeps cache key stable on workers

    demeter.run_model(config_file=config_file, write_outputs=True)

    output_dir = Path(config_file).parent
    return {
        "scenario": Path(config_file).stem.replace("demeter_", ""),
        "output_dir": str(output_dir),
        "config_file": config_file,
    }


# ---------------------------------------------------------------------------
# Stage 3 — postprocess: aggregate per-scenario outputs.
# ---------------------------------------------------------------------------


@cacheable(return_type=dict, scenario_results=list)
def aggregate_demeter_outputs(scenario_results: list[dict[str, Any]]) -> dict[str, Any]:
    """Collect per-scenario output paths into a single summary file."""

    summary_dir = Path(OUTPUT_DIR) / "_summary"
    summary_dir.mkdir(parents=True, exist_ok=True)

    summary_path = summary_dir / "scenarios.json"
    payload = {
        "scenario_count": len(scenario_results),
        "scenarios": scenario_results,
    }
    summary_path.write_text(json.dumps(payload, indent=2))

    return {"summary_path": str(summary_path), "scenario_count": len(scenario_results)}


# ---------------------------------------------------------------------------
# Driver — used by `scalable run` and tutorial code blocks alike.
# ---------------------------------------------------------------------------


def main(scenarios: list[str] | None = None, target: str = "local") -> dict[str, Any]:
    """Run the Demeter pipeline for ``scenarios`` against ``target``.

    Tutorial 03 cites the ``client.submit(... tag="demeter")`` lines below
    when discussing scenario fan-out; do not reorder without updating
    :ref:`tutorial_scaling_strategies`.
    """

    if scenarios is None:
        scenarios = ["reference"]

    manifest_path = Path(__file__).resolve().parent / "scalable.demeter.yaml"
    session = ScalableSession.from_yaml(str(manifest_path), target=target)
    client = session.start()
    try:
        # Stage 1: build per-scenario configs (lightweight, runs on preprocess workers).
        config_futures = [
            client.submit(prepare_demeter_config, s, tag="preprocess")
            for s in scenarios
        ]
        config_files = client.gather(config_futures)

        # Stage 2: fan out heavy Demeter runs across the cluster.
        scenario_futures = [
            client.submit(run_demeter_scenario, cfg, tag="demeter")
            for cfg in config_files
        ]
        scenario_results = client.gather(scenario_futures)

        # Stage 3: aggregate.
        summary = client.submit(
            aggregate_demeter_outputs, scenario_results, tag="postprocess"
        ).result()
    finally:
        session.close()

    return summary


if __name__ == "__main__":  # pragma: no cover - script entry point
    import argparse

    parser = argparse.ArgumentParser(description="Run Demeter for one or more scenarios.")
    parser.add_argument(
        "--scenarios",
        nargs="+",
        default=["reference"],
        help="GCAM scenario identifiers to downscale (default: reference)",
    )
    parser.add_argument(
        "--target",
        default="local",
        help="Manifest target name (default: local)",
    )
    args = parser.parse_args()
    print(json.dumps(main(scenarios=args.scenarios, target=args.target), indent=2))