Files
aaronAI/scripts/experiments/e14_per_source_predicates.py
T
aaron 3f7fba7e0e scripts/: separate production from experimental and deprecated
Moves 28 experiment scripts to scripts/experiments/ (E1, E1.4, E1.6, E2,
base_class, cascade, cost_test, briefing, consistency, token series).
Moves 2 dissolved-layer scripts to scripts/deprecated/ (consolidator_v0_1.py,
tier1_migration.py — under the bespoke decision both target retired
substrate work).
Removes 19 .bak* files from disk (gitignored, never tracked; git history
is the durable record of every prior version).

The 11 production scripts remain in scripts/. All systemd ExecStart paths,
api.py subprocess calls, and cron jobs continue to resolve correctly —
verified by grep against /etc/systemd/system/aaronai-*.service, scripts/
references in api.py, and the user crontab.

Track 1 inventory cross-cutting finding: scripts/ mixed 11 production
files with 32 experimental scripts and ~20 .bak files. After this commit
a clean-room reader can identify the live workers from a directory listing
alone.

Found by Track 1 inventory 2026-05-02. See
~/aaronai/docs/scripts-reorg-plan-2026-05-02.md for full reasoning.

After commit, run:
1. git log --oneline -3 — show the new commit on top
2. git status — confirm clean working tree (modulo the docs/ untracked files which are intentional)
2026-05-02 23:28:24 +00:00

156 lines
5.5 KiB
Python

"""
E1.4 per-source predicate diversity comparison — fixed version.
Looks up episode uuids by name in both production and cascade graphs.
"""
import json
from collections import defaultdict
from falkordb import FalkorDB
E14_RESULTS = "/home/aaron/aaronai/experiments/e14_cascade_results.json"
PRODUCTION_GROUP = "aaron"
CASCADE_GROUP = "aaron_cascade_e14"
def get_predicates_for_episode(graph, episode_uuid):
query = """
MATCH ()-[r:RELATES_TO]->()
WHERE $uuid IN r.episodes
RETURN count(DISTINCT r.name) AS predicate_count
"""
result = graph.query(query, {"uuid": episode_uuid})
rows = result.result_set
return rows[0][0] if rows else 0
def get_edge_count_for_episode(graph, episode_uuid):
query = """
MATCH ()-[r:RELATES_TO]->()
WHERE $uuid IN r.episodes
RETURN count(r) AS edge_count
"""
result = graph.query(query, {"uuid": episode_uuid})
rows = result.result_set
return rows[0][0] if rows else 0
def find_episode_uuid(graph, source_name):
query = """
MATCH (e:Episodic {name: $name})
RETURN e.uuid AS uuid
LIMIT 1
"""
result = graph.query(query, {"name": source_name})
rows = result.result_set
return rows[0][0] if rows else None
def main():
db = FalkorDB(host='localhost', port=6379)
prod_graph = db.select_graph(PRODUCTION_GROUP)
cascade_graph = db.select_graph(CASCADE_GROUP)
with open(E14_RESULTS) as f:
e14 = json.load(f)
sources = [r for r in e14['results'] if 'submit_result' in r]
print(f"Analyzing {len(sources)} sources...")
print()
comparisons = []
missing_prod = 0
missing_cascade = 0
for src in sources:
name = src['name']
bucket = src['bucket']
prod_uuid = find_episode_uuid(prod_graph, name)
cascade_uuid = find_episode_uuid(cascade_graph, name)
if not prod_uuid:
missing_prod += 1
print(f" WARN: missing in production: {name}")
continue
if not cascade_uuid:
missing_cascade += 1
print(f" WARN: missing in cascade: {name}")
continue
prod_preds = get_predicates_for_episode(prod_graph, prod_uuid)
cascade_preds = get_predicates_for_episode(cascade_graph, cascade_uuid)
prod_edges = get_edge_count_for_episode(prod_graph, prod_uuid)
cascade_edges = get_edge_count_for_episode(cascade_graph, cascade_uuid)
comparisons.append({
"name": name,
"bucket": bucket,
"prod_preds": prod_preds,
"cascade_preds": cascade_preds,
"delta_preds": cascade_preds - prod_preds,
"prod_edges": prod_edges,
"cascade_edges": cascade_edges,
"delta_edges": cascade_edges - prod_edges,
})
if missing_prod or missing_cascade:
print()
print(f"Missing: {missing_prod} prod, {missing_cascade} cascade")
print()
if not comparisons:
print("No comparable sources found. Aborting.")
return
# Per-source detail
print(f"{'Bucket':<10} {'Source':<58} {'Preds A→B':<14} {'Δ':<6} {'Edges A→B':<14} {'Δ'}")
print("-" * 115)
for c in sorted(comparisons, key=lambda x: (x['bucket'], x['name'])):
name_short = (c['name'][:55] + '..') if len(c['name']) > 58 else c['name']
preds_str = f"{c['prod_preds']}{c['cascade_preds']}"
edges_str = f"{c['prod_edges']}{c['cascade_edges']}"
print(f"{c['bucket']:<10} {name_short:<58} {preds_str:<14} {c['delta_preds']:+d} {edges_str:<14} {c['delta_edges']:+d}")
# Per-bucket aggregation
print()
print("=" * 115)
print("PER-BUCKET AGGREGATION")
print("=" * 115)
by_bucket = defaultdict(list)
for c in comparisons:
by_bucket[c['bucket']].append(c)
for bucket in ['high', 'mid', 'low', 'document']:
items = by_bucket.get(bucket, [])
if not items:
continue
n = len(items)
sum_pp = sum(c['prod_preds'] for c in items)
sum_cp = sum(c['cascade_preds'] for c in items)
sum_pe = sum(c['prod_edges'] for c in items)
sum_ce = sum(c['cascade_edges'] for c in items)
positive = sum(1 for c in items if c['delta_preds'] > 0)
negative = sum(1 for c in items if c['delta_preds'] < 0)
flat = sum(1 for c in items if c['delta_preds'] == 0)
pct_pred = ((sum_cp - sum_pp) / sum_pp * 100) if sum_pp else 0
pct_edge = ((sum_ce - sum_pe) / sum_pe * 100) if sum_pe else 0
print(f"\n{bucket.upper()} (n={n}):")
print(f" Predicates: {sum_pp}{sum_cp} ({pct_pred:+.1f}%)")
print(f" Edges: {sum_pe}{sum_ce} ({pct_edge:+.1f}%)")
print(f" Outcomes: {positive} positive, {flat} flat, {negative} negative")
# Aggregate
print()
print("=" * 115)
print(f"AGGREGATE (n={len(comparisons)})")
print("=" * 115)
total_pp = sum(c['prod_preds'] for c in comparisons)
total_cp = sum(c['cascade_preds'] for c in comparisons)
total_pe = sum(c['prod_edges'] for c in comparisons)
total_ce = sum(c['cascade_edges'] for c in comparisons)
print(f" Predicates: {total_pp}{total_cp} ({(total_cp-total_pp)/total_pp*100:+.1f}%)")
print(f" Edges: {total_pe}{total_ce} ({(total_ce-total_pe)/total_pe*100:+.1f}%)")
out_path = "/home/aaron/aaronai/experiments/e14_per_source_comparison.json"
with open(out_path, "w") as f:
json.dump(comparisons, f, indent=2)
print()
print(f"Saved to {out_path}")
if __name__ == "__main__":
main()