add experiment scripts and results; watcher.py latest changes
This commit is contained in:
@@ -0,0 +1,155 @@
|
||||
"""
|
||||
E1.4 per-source predicate diversity comparison — fixed version.
|
||||
Looks up episode uuids by name in both production and cascade graphs.
|
||||
"""
|
||||
import json
|
||||
from collections import defaultdict
|
||||
from falkordb import FalkorDB
|
||||
|
||||
E14_RESULTS = "/home/aaron/aaronai/experiments/e14_cascade_results.json"
|
||||
PRODUCTION_GROUP = "aaron"
|
||||
CASCADE_GROUP = "aaron_cascade_e14"
|
||||
|
||||
def get_predicates_for_episode(graph, episode_uuid):
|
||||
query = """
|
||||
MATCH ()-[r:RELATES_TO]->()
|
||||
WHERE $uuid IN r.episodes
|
||||
RETURN count(DISTINCT r.name) AS predicate_count
|
||||
"""
|
||||
result = graph.query(query, {"uuid": episode_uuid})
|
||||
rows = result.result_set
|
||||
return rows[0][0] if rows else 0
|
||||
|
||||
def get_edge_count_for_episode(graph, episode_uuid):
|
||||
query = """
|
||||
MATCH ()-[r:RELATES_TO]->()
|
||||
WHERE $uuid IN r.episodes
|
||||
RETURN count(r) AS edge_count
|
||||
"""
|
||||
result = graph.query(query, {"uuid": episode_uuid})
|
||||
rows = result.result_set
|
||||
return rows[0][0] if rows else 0
|
||||
|
||||
def find_episode_uuid(graph, source_name):
|
||||
query = """
|
||||
MATCH (e:Episodic {name: $name})
|
||||
RETURN e.uuid AS uuid
|
||||
LIMIT 1
|
||||
"""
|
||||
result = graph.query(query, {"name": source_name})
|
||||
rows = result.result_set
|
||||
return rows[0][0] if rows else None
|
||||
|
||||
def main():
|
||||
db = FalkorDB(host='localhost', port=6379)
|
||||
prod_graph = db.select_graph(PRODUCTION_GROUP)
|
||||
cascade_graph = db.select_graph(CASCADE_GROUP)
|
||||
|
||||
with open(E14_RESULTS) as f:
|
||||
e14 = json.load(f)
|
||||
|
||||
sources = [r for r in e14['results'] if 'submit_result' in r]
|
||||
print(f"Analyzing {len(sources)} sources...")
|
||||
print()
|
||||
|
||||
comparisons = []
|
||||
missing_prod = 0
|
||||
missing_cascade = 0
|
||||
for src in sources:
|
||||
name = src['name']
|
||||
bucket = src['bucket']
|
||||
|
||||
prod_uuid = find_episode_uuid(prod_graph, name)
|
||||
cascade_uuid = find_episode_uuid(cascade_graph, name)
|
||||
|
||||
if not prod_uuid:
|
||||
missing_prod += 1
|
||||
print(f" WARN: missing in production: {name}")
|
||||
continue
|
||||
if not cascade_uuid:
|
||||
missing_cascade += 1
|
||||
print(f" WARN: missing in cascade: {name}")
|
||||
continue
|
||||
|
||||
prod_preds = get_predicates_for_episode(prod_graph, prod_uuid)
|
||||
cascade_preds = get_predicates_for_episode(cascade_graph, cascade_uuid)
|
||||
prod_edges = get_edge_count_for_episode(prod_graph, prod_uuid)
|
||||
cascade_edges = get_edge_count_for_episode(cascade_graph, cascade_uuid)
|
||||
|
||||
comparisons.append({
|
||||
"name": name,
|
||||
"bucket": bucket,
|
||||
"prod_preds": prod_preds,
|
||||
"cascade_preds": cascade_preds,
|
||||
"delta_preds": cascade_preds - prod_preds,
|
||||
"prod_edges": prod_edges,
|
||||
"cascade_edges": cascade_edges,
|
||||
"delta_edges": cascade_edges - prod_edges,
|
||||
})
|
||||
|
||||
if missing_prod or missing_cascade:
|
||||
print()
|
||||
print(f"Missing: {missing_prod} prod, {missing_cascade} cascade")
|
||||
print()
|
||||
|
||||
if not comparisons:
|
||||
print("No comparable sources found. Aborting.")
|
||||
return
|
||||
|
||||
# Per-source detail
|
||||
print(f"{'Bucket':<10} {'Source':<58} {'Preds A→B':<14} {'Δ':<6} {'Edges A→B':<14} {'Δ'}")
|
||||
print("-" * 115)
|
||||
for c in sorted(comparisons, key=lambda x: (x['bucket'], x['name'])):
|
||||
name_short = (c['name'][:55] + '..') if len(c['name']) > 58 else c['name']
|
||||
preds_str = f"{c['prod_preds']}→{c['cascade_preds']}"
|
||||
edges_str = f"{c['prod_edges']}→{c['cascade_edges']}"
|
||||
print(f"{c['bucket']:<10} {name_short:<58} {preds_str:<14} {c['delta_preds']:+d} {edges_str:<14} {c['delta_edges']:+d}")
|
||||
|
||||
# Per-bucket aggregation
|
||||
print()
|
||||
print("=" * 115)
|
||||
print("PER-BUCKET AGGREGATION")
|
||||
print("=" * 115)
|
||||
by_bucket = defaultdict(list)
|
||||
for c in comparisons:
|
||||
by_bucket[c['bucket']].append(c)
|
||||
|
||||
for bucket in ['high', 'mid', 'low', 'document']:
|
||||
items = by_bucket.get(bucket, [])
|
||||
if not items:
|
||||
continue
|
||||
n = len(items)
|
||||
sum_pp = sum(c['prod_preds'] for c in items)
|
||||
sum_cp = sum(c['cascade_preds'] for c in items)
|
||||
sum_pe = sum(c['prod_edges'] for c in items)
|
||||
sum_ce = sum(c['cascade_edges'] for c in items)
|
||||
positive = sum(1 for c in items if c['delta_preds'] > 0)
|
||||
negative = sum(1 for c in items if c['delta_preds'] < 0)
|
||||
flat = sum(1 for c in items if c['delta_preds'] == 0)
|
||||
pct_pred = ((sum_cp - sum_pp) / sum_pp * 100) if sum_pp else 0
|
||||
pct_edge = ((sum_ce - sum_pe) / sum_pe * 100) if sum_pe else 0
|
||||
print(f"\n{bucket.upper()} (n={n}):")
|
||||
print(f" Predicates: {sum_pp} → {sum_cp} ({pct_pred:+.1f}%)")
|
||||
print(f" Edges: {sum_pe} → {sum_ce} ({pct_edge:+.1f}%)")
|
||||
print(f" Outcomes: {positive} positive, {flat} flat, {negative} negative")
|
||||
|
||||
# Aggregate
|
||||
print()
|
||||
print("=" * 115)
|
||||
print(f"AGGREGATE (n={len(comparisons)})")
|
||||
print("=" * 115)
|
||||
total_pp = sum(c['prod_preds'] for c in comparisons)
|
||||
total_cp = sum(c['cascade_preds'] for c in comparisons)
|
||||
total_pe = sum(c['prod_edges'] for c in comparisons)
|
||||
total_ce = sum(c['cascade_edges'] for c in comparisons)
|
||||
print(f" Predicates: {total_pp} → {total_cp} ({(total_cp-total_pp)/total_pp*100:+.1f}%)")
|
||||
print(f" Edges: {total_pe} → {total_ce} ({(total_ce-total_pe)/total_pe*100:+.1f}%)")
|
||||
|
||||
out_path = "/home/aaron/aaronai/experiments/e14_per_source_comparison.json"
|
||||
with open(out_path, "w") as f:
|
||||
json.dump(comparisons, f, indent=2)
|
||||
print()
|
||||
print(f"Saved to {out_path}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user