Files
aaron 3f7fba7e0e scripts/: separate production from experimental and deprecated
Moves 28 experiment scripts to scripts/experiments/ (E1, E1.4, E1.6, E2,
base_class, cascade, cost_test, briefing, consistency, token series).
Moves 2 dissolved-layer scripts to scripts/deprecated/ (consolidator_v0_1.py,
tier1_migration.py — under the bespoke decision both target retired
substrate work).
Removes 19 .bak* files from disk (gitignored, never tracked; git history
is the durable record of every prior version).

The 11 production scripts remain in scripts/. All systemd ExecStart paths,
api.py subprocess calls, and cron jobs continue to resolve correctly —
verified by grep against /etc/systemd/system/aaronai-*.service, scripts/
references in api.py, and the user crontab.

Track 1 inventory cross-cutting finding: scripts/ mixed 11 production
files with 32 experimental scripts and ~20 .bak files. After this commit
a clean-room reader can identify the live workers from a directory listing
alone.

Found by Track 1 inventory 2026-05-02. See
~/aaronai/docs/scripts-reorg-plan-2026-05-02.md for full reasoning.

After commit, run:
1. git log --oneline -3 — show the new commit on top
2. git status — confirm clean working tree (modulo the docs/ untracked files which are intentional)
2026-05-02 23:28:24 +00:00

247 lines
9.3 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
E1.6 analysis — correlate domain-purity ratings with cascade outcomes.
Applies pre-registered decision rules from E1.6 protocol.
"""
import json
from collections import defaultdict
RATINGS_PATH = "/home/aaron/aaronai/experiments/e16_purity_ratings.json"
COMPARISON_PATH = "/home/aaron/aaronai/experiments/e14_per_source_comparison.json"
def spearman(xs, ys):
"""Compute Spearman rank correlation."""
n = len(xs)
if n < 2:
return None
# Rank the values
def rank(values):
sorted_idx = sorted(range(len(values)), key=lambda i: values[i])
ranks = [0] * len(values)
i = 0
while i < len(values):
j = i
while j + 1 < len(values) and values[sorted_idx[j+1]] == values[sorted_idx[i]]:
j += 1
avg_rank = (i + j) / 2 + 1
for k in range(i, j + 1):
ranks[sorted_idx[k]] = avg_rank
i = j + 1
return ranks
rx = rank(xs)
ry = rank(ys)
mean_rx = sum(rx) / n
mean_ry = sum(ry) / n
num = sum((rx[i] - mean_rx) * (ry[i] - mean_ry) for i in range(n))
den_x = (sum((rx[i] - mean_rx) ** 2 for i in range(n))) ** 0.5
den_y = (sum((ry[i] - mean_ry) ** 2 for i in range(n))) ** 0.5
if den_x == 0 or den_y == 0:
return None
return num / (den_x * den_y)
def main():
with open(RATINGS_PATH) as f:
ratings_data = json.load(f)
with open(COMPARISON_PATH) as f:
comparisons = json.load(f)
ratings_by_name = {r['name']: r for r in ratings_data['ratings']}
comp_by_name = {c['name']: c for c in comparisons}
# Join ratings with cascade outcomes
joined = []
for name, rating in ratings_by_name.items():
if name in comp_by_name:
comp = comp_by_name[name]
joined.append({
'name': name,
'binary': rating['binary'],
'score': rating['score'],
'note': rating.get('note'),
'bucket': comp['bucket'],
'delta_preds': comp['delta_preds'],
'delta_edges': comp['delta_edges'],
'prod_preds': comp['prod_preds'],
'cascade_preds': comp['cascade_preds'],
})
print("=" * 100)
print(f"E1.6 ANALYSIS — Domain Purity vs Cascade Outcome (n={len(joined)})")
print("=" * 100)
# Per-source detail with rating
print()
print(f"{'Bucket':<10} {'Source':<48} {'Domain':<8} {'Score':<6} {'Δpreds':<8} {'Δedges':<8}")
print("-" * 100)
for j in sorted(joined, key=lambda x: (x['binary'], -x['score'], x['bucket'], x['name'])):
name_short = (j['name'][:45] + '..') if len(j['name']) > 48 else j['name']
print(f"{j['bucket']:<10} {name_short:<48} {j['binary']:<8} {j['score']:<6} {j['delta_preds']:+d} {j['delta_edges']:+d}")
# PRIMARY TEST: binary purity vs cascade outcome distribution
print()
print("=" * 100)
print("PRIMARY TEST: Binary purity vs cascade outcome distribution")
print("=" * 100)
def categorize_outcome(delta):
if delta > 0:
return 'positive'
elif delta < 0:
return 'negative'
else:
return 'flat'
by_binary = defaultdict(lambda: {'positive': 0, 'flat': 0, 'negative': 0, 'total': 0})
for j in joined:
outcome = categorize_outcome(j['delta_preds'])
by_binary[j['binary']][outcome] += 1
by_binary[j['binary']]['total'] += 1
print()
print(f"{'Group':<15} {'n':<5} {'Positive':<12} {'Flat':<10} {'Negative':<12}")
print("-" * 60)
for binary in ['single', 'multi']:
d = by_binary[binary]
n = d['total']
if n == 0:
continue
pos_pct = d['positive'] / n * 100
flat_pct = d['flat'] / n * 100
neg_pct = d['negative'] / n * 100
print(f"{binary+'-domain':<15} {n:<5} {d['positive']} ({pos_pct:.0f}%) {d['flat']} ({flat_pct:.0f}%) {d['negative']} ({neg_pct:.0f}%)")
# Compute the gap
if by_binary['single']['total'] > 0 and by_binary['multi']['total'] > 0:
single_pos_rate = by_binary['single']['positive'] / by_binary['single']['total'] * 100
multi_pos_rate = by_binary['multi']['positive'] / by_binary['multi']['total'] * 100
gap = single_pos_rate - multi_pos_rate
print()
print(f"Cascade-positive rate gap (single - multi): {gap:+.1f} percentage points")
print()
# Apply pre-registered decision rule
if gap >= 20:
verdict = "NARROWNESS HYPOTHESIS SUPPORTED"
detail = f"Single-domain content is {gap:.0f}pp more likely to gain from cascade than multi-domain."
elif gap <= -20:
verdict = "REVERSE OF HYPOTHESIS"
detail = f"Multi-domain content unexpectedly benefits more (counter to prediction)."
elif abs(gap) < 10:
verdict = "HYPOTHESIS NOT SUPPORTED"
detail = "Domain purity does not appear to predict cascade outcome."
else:
verdict = "INCONCLUSIVE"
detail = f"Gap of {gap:+.0f}pp is suggestive but below the pre-registered 20pp threshold."
print(f" Pre-registered decision rule: {verdict}")
print(f" {detail}")
# SECONDARY TEST: Spearman correlation between purity score and predicate delta
print()
print("=" * 100)
print("SECONDARY TEST: Spearman rank correlation (purity score vs predicate delta)")
print("=" * 100)
scores = [j['score'] for j in joined]
deltas_pred = [j['delta_preds'] for j in joined]
deltas_edge = [j['delta_edges'] for j in joined]
rho_pred = spearman(scores, deltas_pred)
rho_edge = spearman(scores, deltas_edge)
print()
print(f" Spearman ρ (purity score vs Δpredicates): {rho_pred:.3f}")
print(f" Spearman ρ (purity score vs Δedges): {rho_edge:.3f}")
print()
if rho_pred is not None:
if rho_pred >= 0.4:
v = "STRONG POSITIVE — narrowness hypothesis supported with monotonic relationship"
elif rho_pred >= 0.2:
v = "WEAK POSITIVE — consistent with hypothesis but not strong evidence"
elif rho_pred <= -0.2:
v = "NEGATIVE — refutes hypothesis"
else:
v = "NO CORRELATION — hypothesis not supported"
print(f" Predicate delta verdict: {v}")
print()
# TERTIARY TEST: within-bucket correlation
print()
print("=" * 100)
print("TERTIARY TEST: Within-bucket correlation")
print("=" * 100)
by_bucket = defaultdict(list)
for j in joined:
by_bucket[j['bucket']].append(j)
print()
print(f"{'Bucket':<12} {'n':<5} {'Single':<10} {'Multi':<10} {'ρ (score vs Δpred)':<22}")
print("-" * 75)
for bucket in ['high', 'mid', 'low', 'document']:
items = by_bucket.get(bucket, [])
if not items:
continue
n = len(items)
n_single = sum(1 for j in items if j['binary'] == 'single')
n_multi = sum(1 for j in items if j['binary'] == 'multi')
if n >= 3:
scores_b = [j['score'] for j in items]
deltas_b = [j['delta_preds'] for j in items]
rho_b = spearman(scores_b, deltas_b)
rho_str = f"{rho_b:+.3f}" if rho_b is not None else "n/a (no variance)"
else:
rho_str = "n/a (too few)"
print(f"{bucket:<12} {n:<5} {n_single:<10} {n_multi:<10} {rho_str}")
# Interaction with bucket: do single/multi outcomes differ within bucket?
print()
print("Per-bucket cascade-positive rate by binary purity:")
print()
print(f"{'Bucket':<12} {'Single':<25} {'Multi':<25}")
print("-" * 65)
for bucket in ['high', 'mid', 'low', 'document']:
items = by_bucket.get(bucket, [])
if not items:
continue
single_items = [j for j in items if j['binary'] == 'single']
multi_items = [j for j in items if j['binary'] == 'multi']
def rate_str(group):
if not group:
return ""
pos = sum(1 for j in group if j['delta_preds'] > 0)
return f"{pos}/{len(group)} positive ({pos/len(group)*100:.0f}%)"
print(f"{bucket:<12} {rate_str(single_items):<25} {rate_str(multi_items):<25}")
# MEAN DELTA by binary group
print()
print("=" * 100)
print("MEAN PREDICATE DELTA BY GROUP")
print("=" * 100)
print()
for binary in ['single', 'multi']:
items = [j for j in joined if j['binary'] == binary]
if not items:
continue
n = len(items)
mean_dp = sum(j['delta_preds'] for j in items) / n
mean_de = sum(j['delta_edges'] for j in items) / n
sum_pp = sum(j['prod_preds'] for j in items)
sum_cp = sum(j['cascade_preds'] for j in items)
pct_change = (sum_cp - sum_pp) / sum_pp * 100 if sum_pp else 0
print(f"{binary}-domain (n={n}):")
print(f" Mean Δpredicates per source: {mean_dp:+.2f}")
print(f" Mean Δedges per source: {mean_de:+.2f}")
print(f" Aggregate predicate change: {sum_pp}{sum_cp} ({pct_change:+.1f}%)")
print()
# Save joined data for the experiments log writeup
out_path = "/home/aaron/aaronai/experiments/e16_joined_analysis.json"
with open(out_path, "w") as f:
json.dump(joined, f, indent=2)
print(f"Joined data saved to {out_path}")
if __name__ == "__main__":
main()