scripts/: separate production from experimental and deprecated
Moves 28 experiment scripts to scripts/experiments/ (E1, E1.4, E1.6, E2, base_class, cascade, cost_test, briefing, consistency, token series). Moves 2 dissolved-layer scripts to scripts/deprecated/ (consolidator_v0_1.py, tier1_migration.py — under the bespoke decision both target retired substrate work). Removes 19 .bak* files from disk (gitignored, never tracked; git history is the durable record of every prior version). The 11 production scripts remain in scripts/. All systemd ExecStart paths, api.py subprocess calls, and cron jobs continue to resolve correctly — verified by grep against /etc/systemd/system/aaronai-*.service, scripts/ references in api.py, and the user crontab. Track 1 inventory cross-cutting finding: scripts/ mixed 11 production files with 32 experimental scripts and ~20 .bak files. After this commit a clean-room reader can identify the live workers from a directory listing alone. Found by Track 1 inventory 2026-05-02. See ~/aaronai/docs/scripts-reorg-plan-2026-05-02.md for full reasoning. After commit, run: 1. git log --oneline -3 — show the new commit on top 2. git status — confirm clean working tree (modulo the docs/ untracked files which are intentional)
This commit is contained in:
@@ -0,0 +1,246 @@
|
||||
"""
|
||||
E1.6 analysis — correlate domain-purity ratings with cascade outcomes.
|
||||
Applies pre-registered decision rules from E1.6 protocol.
|
||||
"""
|
||||
import json
|
||||
from collections import defaultdict
|
||||
|
||||
RATINGS_PATH = "/home/aaron/aaronai/experiments/e16_purity_ratings.json"
|
||||
COMPARISON_PATH = "/home/aaron/aaronai/experiments/e14_per_source_comparison.json"
|
||||
|
||||
|
||||
def spearman(xs, ys):
|
||||
"""Compute Spearman rank correlation."""
|
||||
n = len(xs)
|
||||
if n < 2:
|
||||
return None
|
||||
# Rank the values
|
||||
def rank(values):
|
||||
sorted_idx = sorted(range(len(values)), key=lambda i: values[i])
|
||||
ranks = [0] * len(values)
|
||||
i = 0
|
||||
while i < len(values):
|
||||
j = i
|
||||
while j + 1 < len(values) and values[sorted_idx[j+1]] == values[sorted_idx[i]]:
|
||||
j += 1
|
||||
avg_rank = (i + j) / 2 + 1
|
||||
for k in range(i, j + 1):
|
||||
ranks[sorted_idx[k]] = avg_rank
|
||||
i = j + 1
|
||||
return ranks
|
||||
rx = rank(xs)
|
||||
ry = rank(ys)
|
||||
mean_rx = sum(rx) / n
|
||||
mean_ry = sum(ry) / n
|
||||
num = sum((rx[i] - mean_rx) * (ry[i] - mean_ry) for i in range(n))
|
||||
den_x = (sum((rx[i] - mean_rx) ** 2 for i in range(n))) ** 0.5
|
||||
den_y = (sum((ry[i] - mean_ry) ** 2 for i in range(n))) ** 0.5
|
||||
if den_x == 0 or den_y == 0:
|
||||
return None
|
||||
return num / (den_x * den_y)
|
||||
|
||||
|
||||
def main():
|
||||
with open(RATINGS_PATH) as f:
|
||||
ratings_data = json.load(f)
|
||||
with open(COMPARISON_PATH) as f:
|
||||
comparisons = json.load(f)
|
||||
|
||||
ratings_by_name = {r['name']: r for r in ratings_data['ratings']}
|
||||
comp_by_name = {c['name']: c for c in comparisons}
|
||||
|
||||
# Join ratings with cascade outcomes
|
||||
joined = []
|
||||
for name, rating in ratings_by_name.items():
|
||||
if name in comp_by_name:
|
||||
comp = comp_by_name[name]
|
||||
joined.append({
|
||||
'name': name,
|
||||
'binary': rating['binary'],
|
||||
'score': rating['score'],
|
||||
'note': rating.get('note'),
|
||||
'bucket': comp['bucket'],
|
||||
'delta_preds': comp['delta_preds'],
|
||||
'delta_edges': comp['delta_edges'],
|
||||
'prod_preds': comp['prod_preds'],
|
||||
'cascade_preds': comp['cascade_preds'],
|
||||
})
|
||||
|
||||
print("=" * 100)
|
||||
print(f"E1.6 ANALYSIS — Domain Purity vs Cascade Outcome (n={len(joined)})")
|
||||
print("=" * 100)
|
||||
|
||||
# Per-source detail with rating
|
||||
print()
|
||||
print(f"{'Bucket':<10} {'Source':<48} {'Domain':<8} {'Score':<6} {'Δpreds':<8} {'Δedges':<8}")
|
||||
print("-" * 100)
|
||||
for j in sorted(joined, key=lambda x: (x['binary'], -x['score'], x['bucket'], x['name'])):
|
||||
name_short = (j['name'][:45] + '..') if len(j['name']) > 48 else j['name']
|
||||
print(f"{j['bucket']:<10} {name_short:<48} {j['binary']:<8} {j['score']:<6} {j['delta_preds']:+d} {j['delta_edges']:+d}")
|
||||
|
||||
# PRIMARY TEST: binary purity vs cascade outcome distribution
|
||||
print()
|
||||
print("=" * 100)
|
||||
print("PRIMARY TEST: Binary purity vs cascade outcome distribution")
|
||||
print("=" * 100)
|
||||
|
||||
def categorize_outcome(delta):
|
||||
if delta > 0:
|
||||
return 'positive'
|
||||
elif delta < 0:
|
||||
return 'negative'
|
||||
else:
|
||||
return 'flat'
|
||||
|
||||
by_binary = defaultdict(lambda: {'positive': 0, 'flat': 0, 'negative': 0, 'total': 0})
|
||||
for j in joined:
|
||||
outcome = categorize_outcome(j['delta_preds'])
|
||||
by_binary[j['binary']][outcome] += 1
|
||||
by_binary[j['binary']]['total'] += 1
|
||||
|
||||
print()
|
||||
print(f"{'Group':<15} {'n':<5} {'Positive':<12} {'Flat':<10} {'Negative':<12}")
|
||||
print("-" * 60)
|
||||
for binary in ['single', 'multi']:
|
||||
d = by_binary[binary]
|
||||
n = d['total']
|
||||
if n == 0:
|
||||
continue
|
||||
pos_pct = d['positive'] / n * 100
|
||||
flat_pct = d['flat'] / n * 100
|
||||
neg_pct = d['negative'] / n * 100
|
||||
print(f"{binary+'-domain':<15} {n:<5} {d['positive']} ({pos_pct:.0f}%) {d['flat']} ({flat_pct:.0f}%) {d['negative']} ({neg_pct:.0f}%)")
|
||||
|
||||
# Compute the gap
|
||||
if by_binary['single']['total'] > 0 and by_binary['multi']['total'] > 0:
|
||||
single_pos_rate = by_binary['single']['positive'] / by_binary['single']['total'] * 100
|
||||
multi_pos_rate = by_binary['multi']['positive'] / by_binary['multi']['total'] * 100
|
||||
gap = single_pos_rate - multi_pos_rate
|
||||
print()
|
||||
print(f"Cascade-positive rate gap (single - multi): {gap:+.1f} percentage points")
|
||||
print()
|
||||
# Apply pre-registered decision rule
|
||||
if gap >= 20:
|
||||
verdict = "NARROWNESS HYPOTHESIS SUPPORTED"
|
||||
detail = f"Single-domain content is {gap:.0f}pp more likely to gain from cascade than multi-domain."
|
||||
elif gap <= -20:
|
||||
verdict = "REVERSE OF HYPOTHESIS"
|
||||
detail = f"Multi-domain content unexpectedly benefits more (counter to prediction)."
|
||||
elif abs(gap) < 10:
|
||||
verdict = "HYPOTHESIS NOT SUPPORTED"
|
||||
detail = "Domain purity does not appear to predict cascade outcome."
|
||||
else:
|
||||
verdict = "INCONCLUSIVE"
|
||||
detail = f"Gap of {gap:+.0f}pp is suggestive but below the pre-registered 20pp threshold."
|
||||
print(f" Pre-registered decision rule: {verdict}")
|
||||
print(f" {detail}")
|
||||
|
||||
# SECONDARY TEST: Spearman correlation between purity score and predicate delta
|
||||
print()
|
||||
print("=" * 100)
|
||||
print("SECONDARY TEST: Spearman rank correlation (purity score vs predicate delta)")
|
||||
print("=" * 100)
|
||||
|
||||
scores = [j['score'] for j in joined]
|
||||
deltas_pred = [j['delta_preds'] for j in joined]
|
||||
deltas_edge = [j['delta_edges'] for j in joined]
|
||||
|
||||
rho_pred = spearman(scores, deltas_pred)
|
||||
rho_edge = spearman(scores, deltas_edge)
|
||||
|
||||
print()
|
||||
print(f" Spearman ρ (purity score vs Δpredicates): {rho_pred:.3f}")
|
||||
print(f" Spearman ρ (purity score vs Δedges): {rho_edge:.3f}")
|
||||
print()
|
||||
|
||||
if rho_pred is not None:
|
||||
if rho_pred >= 0.4:
|
||||
v = "STRONG POSITIVE — narrowness hypothesis supported with monotonic relationship"
|
||||
elif rho_pred >= 0.2:
|
||||
v = "WEAK POSITIVE — consistent with hypothesis but not strong evidence"
|
||||
elif rho_pred <= -0.2:
|
||||
v = "NEGATIVE — refutes hypothesis"
|
||||
else:
|
||||
v = "NO CORRELATION — hypothesis not supported"
|
||||
print(f" Predicate delta verdict: {v}")
|
||||
print()
|
||||
|
||||
# TERTIARY TEST: within-bucket correlation
|
||||
print()
|
||||
print("=" * 100)
|
||||
print("TERTIARY TEST: Within-bucket correlation")
|
||||
print("=" * 100)
|
||||
|
||||
by_bucket = defaultdict(list)
|
||||
for j in joined:
|
||||
by_bucket[j['bucket']].append(j)
|
||||
|
||||
print()
|
||||
print(f"{'Bucket':<12} {'n':<5} {'Single':<10} {'Multi':<10} {'ρ (score vs Δpred)':<22}")
|
||||
print("-" * 75)
|
||||
for bucket in ['high', 'mid', 'low', 'document']:
|
||||
items = by_bucket.get(bucket, [])
|
||||
if not items:
|
||||
continue
|
||||
n = len(items)
|
||||
n_single = sum(1 for j in items if j['binary'] == 'single')
|
||||
n_multi = sum(1 for j in items if j['binary'] == 'multi')
|
||||
if n >= 3:
|
||||
scores_b = [j['score'] for j in items]
|
||||
deltas_b = [j['delta_preds'] for j in items]
|
||||
rho_b = spearman(scores_b, deltas_b)
|
||||
rho_str = f"{rho_b:+.3f}" if rho_b is not None else "n/a (no variance)"
|
||||
else:
|
||||
rho_str = "n/a (too few)"
|
||||
print(f"{bucket:<12} {n:<5} {n_single:<10} {n_multi:<10} {rho_str}")
|
||||
|
||||
# Interaction with bucket: do single/multi outcomes differ within bucket?
|
||||
print()
|
||||
print("Per-bucket cascade-positive rate by binary purity:")
|
||||
print()
|
||||
print(f"{'Bucket':<12} {'Single':<25} {'Multi':<25}")
|
||||
print("-" * 65)
|
||||
for bucket in ['high', 'mid', 'low', 'document']:
|
||||
items = by_bucket.get(bucket, [])
|
||||
if not items:
|
||||
continue
|
||||
single_items = [j for j in items if j['binary'] == 'single']
|
||||
multi_items = [j for j in items if j['binary'] == 'multi']
|
||||
def rate_str(group):
|
||||
if not group:
|
||||
return "—"
|
||||
pos = sum(1 for j in group if j['delta_preds'] > 0)
|
||||
return f"{pos}/{len(group)} positive ({pos/len(group)*100:.0f}%)"
|
||||
print(f"{bucket:<12} {rate_str(single_items):<25} {rate_str(multi_items):<25}")
|
||||
|
||||
# MEAN DELTA by binary group
|
||||
print()
|
||||
print("=" * 100)
|
||||
print("MEAN PREDICATE DELTA BY GROUP")
|
||||
print("=" * 100)
|
||||
print()
|
||||
for binary in ['single', 'multi']:
|
||||
items = [j for j in joined if j['binary'] == binary]
|
||||
if not items:
|
||||
continue
|
||||
n = len(items)
|
||||
mean_dp = sum(j['delta_preds'] for j in items) / n
|
||||
mean_de = sum(j['delta_edges'] for j in items) / n
|
||||
sum_pp = sum(j['prod_preds'] for j in items)
|
||||
sum_cp = sum(j['cascade_preds'] for j in items)
|
||||
pct_change = (sum_cp - sum_pp) / sum_pp * 100 if sum_pp else 0
|
||||
print(f"{binary}-domain (n={n}):")
|
||||
print(f" Mean Δpredicates per source: {mean_dp:+.2f}")
|
||||
print(f" Mean Δedges per source: {mean_de:+.2f}")
|
||||
print(f" Aggregate predicate change: {sum_pp} → {sum_cp} ({pct_change:+.1f}%)")
|
||||
print()
|
||||
|
||||
# Save joined data for the experiments log writeup
|
||||
out_path = "/home/aaron/aaronai/experiments/e16_joined_analysis.json"
|
||||
with open(out_path, "w") as f:
|
||||
json.dump(joined, f, indent=2)
|
||||
print(f"Joined data saved to {out_path}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user