add experiment scripts and results; watcher.py latest changes

2026-04-30 18:06:03 +00:00
parent 1cf26df450
commit f11cacd9c9
55 changed files with 23594 additions and 726 deletions
@@ -0,0 +1,193 @@
+#!/usr/bin/env python3
+"""
+Audit Expansion Pack Generator — type-aware stratified draw of 12
+documents from base_class_validation_results.json for n=20 audit expansion.
+
+Per audit-expansion-protocol.md amendment 2026-04-28:
+The seed=43 length-only random draw concentrated on course modules in the
+small and medium buckets, missing voice captures, syllabi, and
+conversational documents present in the candidate distribution.
+This script implements type-aware stratification within each length
+bucket to produce a sample representative of BirdAI's document-type mix.
+
+Targets (12 total):
+  small  (4): 2 course_module + 2 voice_capture
+  medium (4): 2 course_module + 1 syllabus + 1 other
+  large  (4): 1 course_ppt + 1 syllabus + 1 faculty_report + 1 conversational
+
+Output: ~/aaronai/experiments/audit_expansion_pack.json
+
+Usage:
+    python3 ~/aaronai/scripts/audit_expansion_draw.py
+    python3 ~/aaronai/scripts/audit_expansion_draw.py --dry-run
+"""
+import argparse
+import json
+import random
+import re
+import sys
+import time
+from pathlib import Path
+
+EXPERIMENTS = Path.home() / "aaronai" / "experiments"
+VALIDATION_RESULTS = EXPERIMENTS / "base_class_validation_results.json"
+EXISTING_AUDIT_PACK = EXPERIMENTS / "base_class_audit_pack.json"
+OUTPUT_FILE = EXPERIMENTS / "audit_expansion_pack.json"
+
+SEED = 43
+
+# Type-aware targets per bucket
+TYPE_TARGETS = {
+    "small":  {"course_module": 2, "voice_capture": 2},
+    "medium": {"course_module": 2, "syllabus": 1, "other": 1},
+    "large":  {"course_ppt": 1, "syllabus": 1, "faculty_report": 1, "conversational": 1},
+}
+
+
+def classify(source, bucket):
+    """Map a source filename to a document type, scoped to bucket where
+    type categories overlap (e.g., 'course_module' vs 'course_ppt')."""
+    s = source.lower()
+
+    # Voice captures — pattern: YYYY-MM-DD-HH-MM-voice.md
+    if re.match(r"\d{4}-\d{2}-\d{2}-\d{2}-\d{2}-voice\.md$", source):
+        return "voice_capture"
+
+    # Conversational exports — pattern: "Claude: ..." or "ChatGPT: ..."
+    if source.startswith("Claude:") or source.startswith("ChatGPT:"):
+        return "conversational"
+
+    # Syllabus — must contain "syllabus" in the name
+    if "syllabus" in s:
+        return "syllabus"
+
+    # Faculty / annual reports
+    if "faculty report" in s or "annual report" in s:
+        return "faculty_report"
+
+    # Course PPTs (large bucket) — pattern: "_PPT_" or "_v3.pptx" or "Mod0N_"
+    if bucket == "large" and (".pptx" in s or "_ppt_" in s or re.match(r"mod\d+_", s)):
+        return "course_ppt"
+
+    # Course modules (small/medium bucket) — pattern: "0N_*.docx" or numeric prefix
+    if re.match(r"^\d{2}_", source):
+        return "course_module"
+
+    # Everything else falls into 'other' for medium; not used in small/large targets
+    return "other"
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--dry-run", action="store_true")
+    args = parser.parse_args()
+
+    if not VALIDATION_RESULTS.exists():
+        print(f"ERROR: {VALIDATION_RESULTS} not found", file=sys.stderr)
+        sys.exit(1)
+    with open(VALIDATION_RESULTS) as f:
+        validation = json.load(f)
+
+    all_docs = validation["results"]
+    print(f"Loaded {len(all_docs)} documents from validation results")
+    print(f"Experiment: {validation.get('title', 'unknown')}")
+
+    # Load existing audit pack to exclude its sources (audit pack uses 'pairs')
+    excluded_sources = set()
+    if EXISTING_AUDIT_PACK.exists():
+        with open(EXISTING_AUDIT_PACK) as f:
+            existing = json.load(f)
+        existing_pairs = existing.get("pairs", existing.get("results", existing))
+        for doc in existing_pairs:
+            src = doc.get("source")
+            if src:
+                excluded_sources.add(src)
+        print(f"Excluding {len(excluded_sources)} sources already in audit pack")
+
+    # Filter to valid candidates
+    valid_docs = []
+    for doc in all_docs:
+        src = doc.get("source")
+        if src in excluded_sources:
+            continue
+        if not doc.get("condition_a") or not doc.get("condition_b"):
+            continue
+        bucket = doc.get("size_bucket")
+        if bucket not in TYPE_TARGETS:
+            continue
+        doc["_type"] = classify(src, bucket)
+        valid_docs.append(doc)
+
+    print(f"Valid candidate documents: {len(valid_docs)}")
+
+    # Print what's available per (bucket, type) before drawing
+    print(f"\nCandidates by (bucket, type):")
+    for bucket in TYPE_TARGETS:
+        bucket_docs = [d for d in valid_docs if d["size_bucket"] == bucket]
+        types_in_bucket = {}
+        for d in bucket_docs:
+            types_in_bucket.setdefault(d["_type"], []).append(d)
+        print(f"  {bucket}:")
+        for t in sorted(types_in_bucket.keys()):
+            target = TYPE_TARGETS[bucket].get(t, "—")
+            print(f"    {t:>16}: {len(types_in_bucket[t])} avail, target {target}")
+
+    # Stratified type-aware draw
+    random.seed(SEED)
+    drawn = []
+    warnings = []
+    for bucket, type_targets in TYPE_TARGETS.items():
+        bucket_docs = [d for d in valid_docs if d["size_bucket"] == bucket]
+        for doc_type, target in type_targets.items():
+            type_docs = [d for d in bucket_docs if d["_type"] == doc_type]
+            if len(type_docs) < target:
+                msg = (f"WARNING: bucket={bucket} type={doc_type} "
+                       f"available={len(type_docs)} target={target}")
+                warnings.append(msg)
+                print(msg, file=sys.stderr)
+            n_to_draw = min(target, len(type_docs))
+            sample = random.sample(type_docs, n_to_draw)
+            drawn.extend(sample)
+
+    # Report draw
+    print(f"\nDrew {len(drawn)} documents:")
+    for d in drawn:
+        src = d.get("source", "<unknown>")
+        chars = d.get("doc_chars_original", 0)
+        bucket = d.get("size_bucket", "?")
+        doc_type = d.get("_type", "?")
+        truncated = " (TRUNCATED)" if d.get("truncated") else ""
+        print(f"  [{bucket:>6}/{doc_type:>16}] {chars:>6}c  {src}{truncated}")
+
+    # Bucket-level summary
+    bucket_counts = {"small": 0, "medium": 0, "large": 0}
+    for d in drawn:
+        bucket_counts[d["size_bucket"]] += 1
+    print(f"\nBucket totals: {bucket_counts}")
+
+    if args.dry_run:
+        print(f"\n--dry-run set, not writing output file")
+        return
+
+    output = {
+        "metadata": {
+            "generated_at": time.strftime("%Y-%m-%dT%H:%M:%S"),
+            "source_validation_file": str(VALIDATION_RESULTS),
+            "seed": SEED,
+            "stratification": "type-aware within length bucket",
+            "type_targets": TYPE_TARGETS,
+            "bucket_counts": bucket_counts,
+            "excluded_count": len(excluded_sources),
+            "warnings": warnings,
+            "purpose": "n=20 audit expansion per audit-expansion-protocol.md (type-aware amendment)",
+        },
+        "results": drawn,
+    }
+    with open(OUTPUT_FILE, "w") as f:
+        json.dump(output, f, indent=2, default=str)
+    print(f"\nWrote {OUTPUT_FILE}")
+    print(f"  {len(drawn)} documents ready for rating")
+
+
+if __name__ == "__main__":
+    main()