#!/usr/bin/env python3 """E1 corrected metric — count distinct predicate names on edges originating from each episode.""" import json import subprocess from pathlib import Path EXPERIMENTS = Path.home() / "aaronai" / "experiments" SAMPLE_FILE = EXPERIMENTS / "cascade_reextract_sample.json" def query(group_id, cypher): result = subprocess.run( ["docker", "exec", "falkordb", "redis-cli", "GRAPH.QUERY", group_id, cypher], capture_output=True, text=True ) return result.stdout def get_episode_uuid(group_id, episode_name): """Look up the UUID for a given episode name in a given group.""" # Escape single quotes in the name safe = episode_name.replace("'", "\\'") cypher = f"MATCH (e:Episodic) WHERE e.name = '{safe}' RETURN e.uuid LIMIT 1" output = query(group_id, cypher) lines = [l.strip() for l in output.split("\n") if l.strip()] for line in lines: # UUID format check if len(line) == 36 and line.count("-") == 4: return line return None def count_predicates_for_episode(group_id, uuid): """Count distinct predicate names on edges where this episode UUID appears in r.episodes.""" cypher = f"MATCH ()-[r:RELATES_TO]->() WHERE '{uuid}' IN r.episodes RETURN count(distinct r.name) AS p" output = query(group_id, cypher) lines = [l.strip() for l in output.split("\n") if l.strip()] for line in lines: if line.isdigit(): return int(line) return 0 def count_total_edges_for_episode(group_id, uuid): """Count total edges originating from this episode.""" cypher = f"MATCH ()-[r:RELATES_TO]->() WHERE '{uuid}' IN r.episodes RETURN count(r) AS n" output = query(group_id, cypher) lines = [l.strip() for l in output.split("\n") if l.strip()] for line in lines: if line.isdigit(): return int(line) return 0 with open(SAMPLE_FILE) as f: sample = json.load(f) selected = sample["selected"] print(f"E1 corrected per-source comparison — predicates per episode by edge origin\n") print(f"{'Source':<60} {'A.edges':>8} {'A.preds':>8} {'B.edges':>8} {'B.preds':>8}") print("-" * 100) a_pred_total = 0 b_pred_total = 0 a_edge_total = 0 b_edge_total = 0 records = [] for ep in selected: name = ep["name"] a_uuid = get_episode_uuid("aaron", name) b_uuid = get_episode_uuid("aaron_cascade_test", name) a_edges = count_total_edges_for_episode("aaron", a_uuid) if a_uuid else 0 a_preds = count_predicates_for_episode("aaron", a_uuid) if a_uuid else 0 b_edges = count_total_edges_for_episode("aaron_cascade_test", b_uuid) if b_uuid else 0 b_preds = count_predicates_for_episode("aaron_cascade_test", b_uuid) if b_uuid else 0 display = name if len(name) <= 58 else name[:55] + "..." print(f"{display:<60} {a_edges:>8} {a_preds:>8} {b_edges:>8} {b_preds:>8}") records.append({ "name": name, "bucket": ep["bucket"], "a_edges": a_edges, "a_preds": a_preds, "b_edges": b_edges, "b_preds": b_preds, }) a_pred_total += a_preds b_pred_total += b_preds a_edge_total += a_edges b_edge_total += b_edges print("-" * 100) n = len(selected) print(f"\nAggregate (n={n}):") print(f" Edges: A total={a_edge_total} mean={a_edge_total/n:.1f} B total={b_edge_total} mean={b_edge_total/n:.1f}") print(f" Predicates: A total={a_pred_total} mean={a_pred_total/n:.1f} B total={b_pred_total} mean={b_pred_total/n:.1f}") if a_pred_total > 0: print(f" Predicate delta: B vs A = {(b_pred_total-a_pred_total)/a_pred_total*100:+.1f}%") if a_edge_total > 0: print(f" Edge delta: B vs A = {(b_edge_total-a_edge_total)/a_edge_total*100:+.1f}%") # Per-bucket print(f"\nPer-bucket:") for bucket in ["high", "mid", "low", "document"]: bucket_records = [r for r in records if r["bucket"] == bucket] if not bucket_records: continue bn = len(bucket_records) a_p = sum(r["a_preds"] for r in bucket_records) b_p = sum(r["b_preds"] for r in bucket_records) a_e = sum(r["a_edges"] for r in bucket_records) b_e = sum(r["b_edges"] for r in bucket_records) delta = ((b_p-a_p)/a_p*100) if a_p > 0 else 0 print(f" [{bucket:>8}] n={bn} A.preds={a_p:>3} B.preds={b_p:>3} ({delta:+.0f}%) A.edges={a_e:>3} B.edges={b_e:>3}") with open(EXPERIMENTS / "cascade_reextract_corrected_comparison.json", "w") as f: json.dump({"per_source": records, "aggregate": {"a_preds": a_pred_total, "b_preds": b_pred_total, "a_edges": a_edge_total, "b_edges": b_edge_total}}, f, indent=2) print(f"\nSaved to {EXPERIMENTS / 'cascade_reextract_corrected_comparison.json'}")