#!/usr/bin/env python3
"""
Data Processing Pipeline
========================

A sample script that reads structured records, transforms them,
and writes summary statistics. Demonstrates dataclasses, type hints,
pathlib, argparse, and idiomatic Python patterns.

Usage:
    python python-sample-script.py --input data.json --output report.txt --top 5
"""

from __future__ import annotations

import argparse
import json
import sys
from dataclasses import dataclass, field
from datetime import datetime
from pathlib import Path
from statistics import mean, median, stdev
from typing import Iterator


@dataclass(frozen=True)
class SalesRecord:
    """Represents a single sales transaction."""

    transaction_id: str
    product: str
    category: str
    quantity: int
    unit_price: float
    timestamp: datetime

    @property
    def total(self) -> float:
        """Calculate the line total for this transaction."""
        return round(self.quantity * self.unit_price, 2)

    @classmethod
    def from_dict(cls, data: dict) -> SalesRecord:
        """Construct a SalesRecord from a raw dictionary."""
        return cls(
            transaction_id=data["transaction_id"],
            product=data["product"],
            category=data["category"],
            quantity=int(data["quantity"]),
            unit_price=float(data["unit_price"]),
            timestamp=datetime.fromisoformat(data["timestamp"]),
        )


@dataclass
class CategorySummary:
    """Aggregated statistics for a product category."""

    category: str
    count: int = 0
    revenue: float = 0.0
    quantities: list[int] = field(default_factory=list)

    @property
    def avg_quantity(self) -> float:
        return round(mean(self.quantities), 1) if self.quantities else 0.0

    @property
    def median_quantity(self) -> float:
        return round(median(self.quantities), 1) if self.quantities else 0.0


def load_records(path: Path) -> Iterator[SalesRecord]:
    """Stream sales records from a JSON file, yielding one at a time."""
    with path.open("r", encoding="utf-8") as fh:
        data = json.load(fh)
    for entry in data:
        try:
            yield SalesRecord.from_dict(entry)
        except (KeyError, ValueError) as exc:
            print(f"  Skipping malformed record: {exc}", file=sys.stderr)


def summarize_by_category(records: Iterator[SalesRecord]) -> dict[str, CategorySummary]:
    """Group records by category and compute summary statistics."""
    summaries: dict[str, CategorySummary] = {}
    for rec in records:
        summary = summaries.setdefault(rec.category, CategorySummary(category=rec.category))
        summary.count += 1
        summary.revenue += rec.total
        summary.quantities.append(rec.quantity)
    return summaries


def format_report(summaries: dict[str, CategorySummary], top_n: int) -> str:
    """Render a plain-text report of the top categories by revenue."""
    ranked = sorted(summaries.values(), key=lambda s: s.revenue, reverse=True)[:top_n]
    lines = [
        "=" * 60,
        "  Sales Summary Report",
        f"  Generated: {datetime.now():%Y-%m-%d %H:%M:%S}",
        "=" * 60,
        "",
    ]
    for rank, cat in enumerate(ranked, start=1):
        lines.append(f"  #{rank}  {cat.category}")
        lines.append(f"       Transactions : {cat.count}")
        lines.append(f"       Total Revenue: ${cat.revenue:,.2f}")
        lines.append(f"       Avg Quantity : {cat.avg_quantity}")
        lines.append(f"       Med Quantity : {cat.median_quantity}")
        lines.append("")
    all_rev = sum(s.revenue for s in summaries.values())
    lines.append(f"  Overall Revenue: ${all_rev:,.2f}")
    lines.append("=" * 60)
    return "\n".join(lines)


def parse_args(argv: list[str] | None = None) -> argparse.Namespace:
    """Define and parse CLI arguments."""
    parser = argparse.ArgumentParser(
        description="Process sales data and generate a summary report.",
    )
    parser.add_argument(
        "--input", "-i",
        type=Path,
        required=True,
        help="Path to the input JSON file containing sales records.",
    )
    parser.add_argument(
        "--output", "-o",
        type=Path,
        default=None,
        help="Path to write the report. Defaults to stdout.",
    )
    parser.add_argument(
        "--top", "-n",
        type=int,
        default=10,
        help="Number of top categories to include (default: 10).",
    )
    return parser.parse_args(argv)


def main(argv: list[str] | None = None) -> int:
    """Entry point for the sales reporting pipeline."""
    args = parse_args(argv)

    if not args.input.exists():
        print(f"Error: input file not found: {args.input}", file=sys.stderr)
        return 1

    records = load_records(args.input)
    summaries = summarize_by_category(records)
    report = format_report(summaries, top_n=args.top)

    if args.output:
        args.output.write_text(report, encoding="utf-8")
        print(f"Report written to {args.output}")
    else:
        print(report)

    return 0


if __name__ == "__main__":
    raise SystemExit(main())