#!/usr/bin/env python3 """Aggregate review length counts into buckets.""" from __future__ import annotations import argparse import json from pathlib import Path from typing import Dict, Iterable, Tuple Bucket = Tuple[int | None, int | None, str] DEFAULT_BUCKETS: Tuple[Bucket, ...] = ( (None, 9, "<10"), (10, 19, "10-19"), (20, 29, "20-29"), (30, 39, "30-39"), (40, 49, "40-49"), (50, 59, "50-59"), (60, 69, "60-69"), (70, 79, "70-79"), (80, 89, "80-89"), (90, 99, "90-99"), (100, 109, "100-109"), (110, 119, "110-119"), (120, 129, "120-129"), (130, 139, "130-139"), (140, 149, "140-149"), (150, 159, "150-159"), (160, 169, "160-169"), (170, 179, "170-179"), (180, 189, "180-189"), (190, 199, "190-199"), (200, 219, "200-219"), (220, 239, "220-239"), (240, 259, "240-259"), (260, 279, "260-279"), (280, 299, "280-299"), (300, 399, "300-399"), (400, 499, "400-499"), (500, 999, "500-999"), (1000, None, "1000+"), ) def load_counts(path: Path) -> Dict[int, int]: with path.open("r", encoding="utf-8") as handle: raw = json.load(handle) return {int(k): int(v) for k, v in raw.items()} def aggregate(counts: Dict[int, int], buckets: Iterable[Bucket]) -> Dict[str, int]: output: Dict[str, int] = {label: 0 for _, _, label in buckets} for length, count in counts.items(): for start, end, label in buckets: if start is None and end is not None and length <= end: output[label] += count break if end is None and start is not None and length >= start: output[label] += count break if start is not None and end is not None and start <= length <= end: output[label] += count break else: raise ValueError(f"No bucket found for length {length}.") return output def write_output(path: Path, data: Dict[str, int]) -> None: with path.open("w", encoding="utf-8") as handle: json.dump(data, handle, indent=2, ensure_ascii=False) handle.write("\n") def main() -> int: parser = argparse.ArgumentParser(description="Bucket review length counts.") parser.add_argument( "input", type=Path, help="Path to review_lengths.json (mapping of length -> count).", ) parser.add_argument( "output", type=Path, help="Path to write bucketed counts JSON.", ) args = parser.parse_args() counts = load_counts(args.input) bucketed = aggregate(counts, DEFAULT_BUCKETS) write_output(args.output, bucketed) return 0 if __name__ == "__main__": raise SystemExit(main())