3f7fba7e0e
Moves 28 experiment scripts to scripts/experiments/ (E1, E1.4, E1.6, E2, base_class, cascade, cost_test, briefing, consistency, token series). Moves 2 dissolved-layer scripts to scripts/deprecated/ (consolidator_v0_1.py, tier1_migration.py — under the bespoke decision both target retired substrate work). Removes 19 .bak* files from disk (gitignored, never tracked; git history is the durable record of every prior version). The 11 production scripts remain in scripts/. All systemd ExecStart paths, api.py subprocess calls, and cron jobs continue to resolve correctly — verified by grep against /etc/systemd/system/aaronai-*.service, scripts/ references in api.py, and the user crontab. Track 1 inventory cross-cutting finding: scripts/ mixed 11 production files with 32 experimental scripts and ~20 .bak files. After this commit a clean-room reader can identify the live workers from a directory listing alone. Found by Track 1 inventory 2026-05-02. See ~/aaronai/docs/scripts-reorg-plan-2026-05-02.md for full reasoning. After commit, run: 1. git log --oneline -3 — show the new commit on top 2. git status — confirm clean working tree (modulo the docs/ untracked files which are intentional)
116 lines
4.6 KiB
Python
116 lines
4.6 KiB
Python
#!/usr/bin/env python3
|
|
"""E1 corrected metric — count distinct predicate names on edges originating from each episode."""
|
|
import json
|
|
import subprocess
|
|
from pathlib import Path
|
|
|
|
EXPERIMENTS = Path.home() / "aaronai" / "experiments"
|
|
SAMPLE_FILE = EXPERIMENTS / "cascade_reextract_sample.json"
|
|
|
|
def query(group_id, cypher):
|
|
result = subprocess.run(
|
|
["docker", "exec", "falkordb", "redis-cli", "GRAPH.QUERY", group_id, cypher],
|
|
capture_output=True, text=True
|
|
)
|
|
return result.stdout
|
|
|
|
def get_episode_uuid(group_id, episode_name):
|
|
"""Look up the UUID for a given episode name in a given group."""
|
|
# Escape single quotes in the name
|
|
safe = episode_name.replace("'", "\\'")
|
|
cypher = f"MATCH (e:Episodic) WHERE e.name = '{safe}' RETURN e.uuid LIMIT 1"
|
|
output = query(group_id, cypher)
|
|
lines = [l.strip() for l in output.split("\n") if l.strip()]
|
|
for line in lines:
|
|
# UUID format check
|
|
if len(line) == 36 and line.count("-") == 4:
|
|
return line
|
|
return None
|
|
|
|
def count_predicates_for_episode(group_id, uuid):
|
|
"""Count distinct predicate names on edges where this episode UUID appears in r.episodes."""
|
|
cypher = f"MATCH ()-[r:RELATES_TO]->() WHERE '{uuid}' IN r.episodes RETURN count(distinct r.name) AS p"
|
|
output = query(group_id, cypher)
|
|
lines = [l.strip() for l in output.split("\n") if l.strip()]
|
|
for line in lines:
|
|
if line.isdigit():
|
|
return int(line)
|
|
return 0
|
|
|
|
def count_total_edges_for_episode(group_id, uuid):
|
|
"""Count total edges originating from this episode."""
|
|
cypher = f"MATCH ()-[r:RELATES_TO]->() WHERE '{uuid}' IN r.episodes RETURN count(r) AS n"
|
|
output = query(group_id, cypher)
|
|
lines = [l.strip() for l in output.split("\n") if l.strip()]
|
|
for line in lines:
|
|
if line.isdigit():
|
|
return int(line)
|
|
return 0
|
|
|
|
with open(SAMPLE_FILE) as f:
|
|
sample = json.load(f)
|
|
selected = sample["selected"]
|
|
|
|
print(f"E1 corrected per-source comparison — predicates per episode by edge origin\n")
|
|
print(f"{'Source':<60} {'A.edges':>8} {'A.preds':>8} {'B.edges':>8} {'B.preds':>8}")
|
|
print("-" * 100)
|
|
|
|
a_pred_total = 0
|
|
b_pred_total = 0
|
|
a_edge_total = 0
|
|
b_edge_total = 0
|
|
records = []
|
|
|
|
for ep in selected:
|
|
name = ep["name"]
|
|
a_uuid = get_episode_uuid("aaron", name)
|
|
b_uuid = get_episode_uuid("aaron_cascade_test", name)
|
|
|
|
a_edges = count_total_edges_for_episode("aaron", a_uuid) if a_uuid else 0
|
|
a_preds = count_predicates_for_episode("aaron", a_uuid) if a_uuid else 0
|
|
b_edges = count_total_edges_for_episode("aaron_cascade_test", b_uuid) if b_uuid else 0
|
|
b_preds = count_predicates_for_episode("aaron_cascade_test", b_uuid) if b_uuid else 0
|
|
|
|
display = name if len(name) <= 58 else name[:55] + "..."
|
|
print(f"{display:<60} {a_edges:>8} {a_preds:>8} {b_edges:>8} {b_preds:>8}")
|
|
|
|
records.append({
|
|
"name": name, "bucket": ep["bucket"],
|
|
"a_edges": a_edges, "a_preds": a_preds,
|
|
"b_edges": b_edges, "b_preds": b_preds,
|
|
})
|
|
a_pred_total += a_preds
|
|
b_pred_total += b_preds
|
|
a_edge_total += a_edges
|
|
b_edge_total += b_edges
|
|
|
|
print("-" * 100)
|
|
n = len(selected)
|
|
print(f"\nAggregate (n={n}):")
|
|
print(f" Edges: A total={a_edge_total} mean={a_edge_total/n:.1f} B total={b_edge_total} mean={b_edge_total/n:.1f}")
|
|
print(f" Predicates: A total={a_pred_total} mean={a_pred_total/n:.1f} B total={b_pred_total} mean={b_pred_total/n:.1f}")
|
|
if a_pred_total > 0:
|
|
print(f" Predicate delta: B vs A = {(b_pred_total-a_pred_total)/a_pred_total*100:+.1f}%")
|
|
if a_edge_total > 0:
|
|
print(f" Edge delta: B vs A = {(b_edge_total-a_edge_total)/a_edge_total*100:+.1f}%")
|
|
|
|
# Per-bucket
|
|
print(f"\nPer-bucket:")
|
|
for bucket in ["high", "mid", "low", "document"]:
|
|
bucket_records = [r for r in records if r["bucket"] == bucket]
|
|
if not bucket_records:
|
|
continue
|
|
bn = len(bucket_records)
|
|
a_p = sum(r["a_preds"] for r in bucket_records)
|
|
b_p = sum(r["b_preds"] for r in bucket_records)
|
|
a_e = sum(r["a_edges"] for r in bucket_records)
|
|
b_e = sum(r["b_edges"] for r in bucket_records)
|
|
delta = ((b_p-a_p)/a_p*100) if a_p > 0 else 0
|
|
print(f" [{bucket:>8}] n={bn} A.preds={a_p:>3} B.preds={b_p:>3} ({delta:+.0f}%) A.edges={a_e:>3} B.edges={b_e:>3}")
|
|
|
|
with open(EXPERIMENTS / "cascade_reextract_corrected_comparison.json", "w") as f:
|
|
json.dump({"per_source": records,
|
|
"aggregate": {"a_preds": a_pred_total, "b_preds": b_pred_total,
|
|
"a_edges": a_edge_total, "b_edges": b_edge_total}}, f, indent=2)
|
|
print(f"\nSaved to {EXPERIMENTS / 'cascade_reextract_corrected_comparison.json'}")
|