Add helper stuff for figures, cleanup

This commit is contained in:
2026-02-20 01:56:28 +01:00
parent 28823dc0b5
commit 101bd81ca1
20 changed files with 1862 additions and 1164 deletions

View File

@@ -0,0 +1,97 @@
#!/usr/bin/env python3
"""Aggregate review length counts into buckets."""
from __future__ import annotations
import argparse
import json
from pathlib import Path
from typing import Dict, Iterable, Tuple
Bucket = Tuple[int | None, int | None, str]
DEFAULT_BUCKETS: Tuple[Bucket, ...] = (
(None, 9, "<10"),
(10, 19, "10-19"),
(20, 29, "20-29"),
(30, 39, "30-39"),
(40, 49, "40-49"),
(50, 59, "50-59"),
(60, 69, "60-69"),
(70, 79, "70-79"),
(80, 89, "80-89"),
(90, 99, "90-99"),
(100, 109, "100-109"),
(110, 119, "110-119"),
(120, 129, "120-129"),
(130, 139, "130-139"),
(140, 149, "140-149"),
(150, 159, "150-159"),
(160, 169, "160-169"),
(170, 179, "170-179"),
(180, 189, "180-189"),
(190, 199, "190-199"),
(200, 219, "200-219"),
(220, 239, "220-239"),
(240, 259, "240-259"),
(260, 279, "260-279"),
(280, 299, "280-299"),
(300, 399, "300-399"),
(400, 499, "400-499"),
(500, 999, "500-999"),
(1000, None, "1000+"),
)
def load_counts(path: Path) -> Dict[int, int]:
with path.open("r", encoding="utf-8") as handle:
raw = json.load(handle)
return {int(k): int(v) for k, v in raw.items()}
def aggregate(counts: Dict[int, int], buckets: Iterable[Bucket]) -> Dict[str, int]:
output: Dict[str, int] = {label: 0 for _, _, label in buckets}
for length, count in counts.items():
for start, end, label in buckets:
if start is None and end is not None and length <= end:
output[label] += count
break
if end is None and start is not None and length >= start:
output[label] += count
break
if start is not None and end is not None and start <= length <= end:
output[label] += count
break
else:
raise ValueError(f"No bucket found for length {length}.")
return output
def write_output(path: Path, data: Dict[str, int]) -> None:
with path.open("w", encoding="utf-8") as handle:
json.dump(data, handle, indent=2, ensure_ascii=False)
handle.write("\n")
def main() -> int:
parser = argparse.ArgumentParser(description="Bucket review length counts.")
parser.add_argument(
"input",
type=Path,
help="Path to review_lengths.json (mapping of length -> count).",
)
parser.add_argument(
"output",
type=Path,
help="Path to write bucketed counts JSON.",
)
args = parser.parse_args()
counts = load_counts(args.input)
bucketed = aggregate(counts, DEFAULT_BUCKETS)
write_output(args.output, bucketed)
return 0
if __name__ == "__main__":
raise SystemExit(main())