add experiment scripts and results; watcher.py latest changes

This commit is contained in:
2026-04-30 18:06:03 +00:00
parent 1cf26df450
commit f11cacd9c9
55 changed files with 23594 additions and 726 deletions
+193
View File
@@ -0,0 +1,193 @@
#!/usr/bin/env python3
"""
Audit Expansion Pack Generator — type-aware stratified draw of 12
documents from base_class_validation_results.json for n=20 audit expansion.
Per audit-expansion-protocol.md amendment 2026-04-28:
The seed=43 length-only random draw concentrated on course modules in the
small and medium buckets, missing voice captures, syllabi, and
conversational documents present in the candidate distribution.
This script implements type-aware stratification within each length
bucket to produce a sample representative of BirdAI's document-type mix.
Targets (12 total):
small (4): 2 course_module + 2 voice_capture
medium (4): 2 course_module + 1 syllabus + 1 other
large (4): 1 course_ppt + 1 syllabus + 1 faculty_report + 1 conversational
Output: ~/aaronai/experiments/audit_expansion_pack.json
Usage:
python3 ~/aaronai/scripts/audit_expansion_draw.py
python3 ~/aaronai/scripts/audit_expansion_draw.py --dry-run
"""
import argparse
import json
import random
import re
import sys
import time
from pathlib import Path
EXPERIMENTS = Path.home() / "aaronai" / "experiments"
VALIDATION_RESULTS = EXPERIMENTS / "base_class_validation_results.json"
EXISTING_AUDIT_PACK = EXPERIMENTS / "base_class_audit_pack.json"
OUTPUT_FILE = EXPERIMENTS / "audit_expansion_pack.json"
SEED = 43
# Type-aware targets per bucket
TYPE_TARGETS = {
"small": {"course_module": 2, "voice_capture": 2},
"medium": {"course_module": 2, "syllabus": 1, "other": 1},
"large": {"course_ppt": 1, "syllabus": 1, "faculty_report": 1, "conversational": 1},
}
def classify(source, bucket):
"""Map a source filename to a document type, scoped to bucket where
type categories overlap (e.g., 'course_module' vs 'course_ppt')."""
s = source.lower()
# Voice captures — pattern: YYYY-MM-DD-HH-MM-voice.md
if re.match(r"\d{4}-\d{2}-\d{2}-\d{2}-\d{2}-voice\.md$", source):
return "voice_capture"
# Conversational exports — pattern: "Claude: ..." or "ChatGPT: ..."
if source.startswith("Claude:") or source.startswith("ChatGPT:"):
return "conversational"
# Syllabus — must contain "syllabus" in the name
if "syllabus" in s:
return "syllabus"
# Faculty / annual reports
if "faculty report" in s or "annual report" in s:
return "faculty_report"
# Course PPTs (large bucket) — pattern: "_PPT_" or "_v3.pptx" or "Mod0N_"
if bucket == "large" and (".pptx" in s or "_ppt_" in s or re.match(r"mod\d+_", s)):
return "course_ppt"
# Course modules (small/medium bucket) — pattern: "0N_*.docx" or numeric prefix
if re.match(r"^\d{2}_", source):
return "course_module"
# Everything else falls into 'other' for medium; not used in small/large targets
return "other"
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--dry-run", action="store_true")
args = parser.parse_args()
if not VALIDATION_RESULTS.exists():
print(f"ERROR: {VALIDATION_RESULTS} not found", file=sys.stderr)
sys.exit(1)
with open(VALIDATION_RESULTS) as f:
validation = json.load(f)
all_docs = validation["results"]
print(f"Loaded {len(all_docs)} documents from validation results")
print(f"Experiment: {validation.get('title', 'unknown')}")
# Load existing audit pack to exclude its sources (audit pack uses 'pairs')
excluded_sources = set()
if EXISTING_AUDIT_PACK.exists():
with open(EXISTING_AUDIT_PACK) as f:
existing = json.load(f)
existing_pairs = existing.get("pairs", existing.get("results", existing))
for doc in existing_pairs:
src = doc.get("source")
if src:
excluded_sources.add(src)
print(f"Excluding {len(excluded_sources)} sources already in audit pack")
# Filter to valid candidates
valid_docs = []
for doc in all_docs:
src = doc.get("source")
if src in excluded_sources:
continue
if not doc.get("condition_a") or not doc.get("condition_b"):
continue
bucket = doc.get("size_bucket")
if bucket not in TYPE_TARGETS:
continue
doc["_type"] = classify(src, bucket)
valid_docs.append(doc)
print(f"Valid candidate documents: {len(valid_docs)}")
# Print what's available per (bucket, type) before drawing
print(f"\nCandidates by (bucket, type):")
for bucket in TYPE_TARGETS:
bucket_docs = [d for d in valid_docs if d["size_bucket"] == bucket]
types_in_bucket = {}
for d in bucket_docs:
types_in_bucket.setdefault(d["_type"], []).append(d)
print(f" {bucket}:")
for t in sorted(types_in_bucket.keys()):
target = TYPE_TARGETS[bucket].get(t, "")
print(f" {t:>16}: {len(types_in_bucket[t])} avail, target {target}")
# Stratified type-aware draw
random.seed(SEED)
drawn = []
warnings = []
for bucket, type_targets in TYPE_TARGETS.items():
bucket_docs = [d for d in valid_docs if d["size_bucket"] == bucket]
for doc_type, target in type_targets.items():
type_docs = [d for d in bucket_docs if d["_type"] == doc_type]
if len(type_docs) < target:
msg = (f"WARNING: bucket={bucket} type={doc_type} "
f"available={len(type_docs)} target={target}")
warnings.append(msg)
print(msg, file=sys.stderr)
n_to_draw = min(target, len(type_docs))
sample = random.sample(type_docs, n_to_draw)
drawn.extend(sample)
# Report draw
print(f"\nDrew {len(drawn)} documents:")
for d in drawn:
src = d.get("source", "<unknown>")
chars = d.get("doc_chars_original", 0)
bucket = d.get("size_bucket", "?")
doc_type = d.get("_type", "?")
truncated = " (TRUNCATED)" if d.get("truncated") else ""
print(f" [{bucket:>6}/{doc_type:>16}] {chars:>6}c {src}{truncated}")
# Bucket-level summary
bucket_counts = {"small": 0, "medium": 0, "large": 0}
for d in drawn:
bucket_counts[d["size_bucket"]] += 1
print(f"\nBucket totals: {bucket_counts}")
if args.dry_run:
print(f"\n--dry-run set, not writing output file")
return
output = {
"metadata": {
"generated_at": time.strftime("%Y-%m-%dT%H:%M:%S"),
"source_validation_file": str(VALIDATION_RESULTS),
"seed": SEED,
"stratification": "type-aware within length bucket",
"type_targets": TYPE_TARGETS,
"bucket_counts": bucket_counts,
"excluded_count": len(excluded_sources),
"warnings": warnings,
"purpose": "n=20 audit expansion per audit-expansion-protocol.md (type-aware amendment)",
},
"results": drawn,
}
with open(OUTPUT_FILE, "w") as f:
json.dump(output, f, indent=2, default=str)
print(f"\nWrote {OUTPUT_FILE}")
print(f" {len(drawn)} documents ready for rating")
if __name__ == "__main__":
main()