""" E1.6 analysis — correlate domain-purity ratings with cascade outcomes. Applies pre-registered decision rules from E1.6 protocol. """ import json from collections import defaultdict RATINGS_PATH = "/home/aaron/aaronai/experiments/e16_purity_ratings.json" COMPARISON_PATH = "/home/aaron/aaronai/experiments/e14_per_source_comparison.json" def spearman(xs, ys): """Compute Spearman rank correlation.""" n = len(xs) if n < 2: return None # Rank the values def rank(values): sorted_idx = sorted(range(len(values)), key=lambda i: values[i]) ranks = [0] * len(values) i = 0 while i < len(values): j = i while j + 1 < len(values) and values[sorted_idx[j+1]] == values[sorted_idx[i]]: j += 1 avg_rank = (i + j) / 2 + 1 for k in range(i, j + 1): ranks[sorted_idx[k]] = avg_rank i = j + 1 return ranks rx = rank(xs) ry = rank(ys) mean_rx = sum(rx) / n mean_ry = sum(ry) / n num = sum((rx[i] - mean_rx) * (ry[i] - mean_ry) for i in range(n)) den_x = (sum((rx[i] - mean_rx) ** 2 for i in range(n))) ** 0.5 den_y = (sum((ry[i] - mean_ry) ** 2 for i in range(n))) ** 0.5 if den_x == 0 or den_y == 0: return None return num / (den_x * den_y) def main(): with open(RATINGS_PATH) as f: ratings_data = json.load(f) with open(COMPARISON_PATH) as f: comparisons = json.load(f) ratings_by_name = {r['name']: r for r in ratings_data['ratings']} comp_by_name = {c['name']: c for c in comparisons} # Join ratings with cascade outcomes joined = [] for name, rating in ratings_by_name.items(): if name in comp_by_name: comp = comp_by_name[name] joined.append({ 'name': name, 'binary': rating['binary'], 'score': rating['score'], 'note': rating.get('note'), 'bucket': comp['bucket'], 'delta_preds': comp['delta_preds'], 'delta_edges': comp['delta_edges'], 'prod_preds': comp['prod_preds'], 'cascade_preds': comp['cascade_preds'], }) print("=" * 100) print(f"E1.6 ANALYSIS — Domain Purity vs Cascade Outcome (n={len(joined)})") print("=" * 100) # Per-source detail with rating print() print(f"{'Bucket':<10} {'Source':<48} {'Domain':<8} {'Score':<6} {'Δpreds':<8} {'Δedges':<8}") print("-" * 100) for j in sorted(joined, key=lambda x: (x['binary'], -x['score'], x['bucket'], x['name'])): name_short = (j['name'][:45] + '..') if len(j['name']) > 48 else j['name'] print(f"{j['bucket']:<10} {name_short:<48} {j['binary']:<8} {j['score']:<6} {j['delta_preds']:+d} {j['delta_edges']:+d}") # PRIMARY TEST: binary purity vs cascade outcome distribution print() print("=" * 100) print("PRIMARY TEST: Binary purity vs cascade outcome distribution") print("=" * 100) def categorize_outcome(delta): if delta > 0: return 'positive' elif delta < 0: return 'negative' else: return 'flat' by_binary = defaultdict(lambda: {'positive': 0, 'flat': 0, 'negative': 0, 'total': 0}) for j in joined: outcome = categorize_outcome(j['delta_preds']) by_binary[j['binary']][outcome] += 1 by_binary[j['binary']]['total'] += 1 print() print(f"{'Group':<15} {'n':<5} {'Positive':<12} {'Flat':<10} {'Negative':<12}") print("-" * 60) for binary in ['single', 'multi']: d = by_binary[binary] n = d['total'] if n == 0: continue pos_pct = d['positive'] / n * 100 flat_pct = d['flat'] / n * 100 neg_pct = d['negative'] / n * 100 print(f"{binary+'-domain':<15} {n:<5} {d['positive']} ({pos_pct:.0f}%) {d['flat']} ({flat_pct:.0f}%) {d['negative']} ({neg_pct:.0f}%)") # Compute the gap if by_binary['single']['total'] > 0 and by_binary['multi']['total'] > 0: single_pos_rate = by_binary['single']['positive'] / by_binary['single']['total'] * 100 multi_pos_rate = by_binary['multi']['positive'] / by_binary['multi']['total'] * 100 gap = single_pos_rate - multi_pos_rate print() print(f"Cascade-positive rate gap (single - multi): {gap:+.1f} percentage points") print() # Apply pre-registered decision rule if gap >= 20: verdict = "NARROWNESS HYPOTHESIS SUPPORTED" detail = f"Single-domain content is {gap:.0f}pp more likely to gain from cascade than multi-domain." elif gap <= -20: verdict = "REVERSE OF HYPOTHESIS" detail = f"Multi-domain content unexpectedly benefits more (counter to prediction)." elif abs(gap) < 10: verdict = "HYPOTHESIS NOT SUPPORTED" detail = "Domain purity does not appear to predict cascade outcome." else: verdict = "INCONCLUSIVE" detail = f"Gap of {gap:+.0f}pp is suggestive but below the pre-registered 20pp threshold." print(f" Pre-registered decision rule: {verdict}") print(f" {detail}") # SECONDARY TEST: Spearman correlation between purity score and predicate delta print() print("=" * 100) print("SECONDARY TEST: Spearman rank correlation (purity score vs predicate delta)") print("=" * 100) scores = [j['score'] for j in joined] deltas_pred = [j['delta_preds'] for j in joined] deltas_edge = [j['delta_edges'] for j in joined] rho_pred = spearman(scores, deltas_pred) rho_edge = spearman(scores, deltas_edge) print() print(f" Spearman ρ (purity score vs Δpredicates): {rho_pred:.3f}") print(f" Spearman ρ (purity score vs Δedges): {rho_edge:.3f}") print() if rho_pred is not None: if rho_pred >= 0.4: v = "STRONG POSITIVE — narrowness hypothesis supported with monotonic relationship" elif rho_pred >= 0.2: v = "WEAK POSITIVE — consistent with hypothesis but not strong evidence" elif rho_pred <= -0.2: v = "NEGATIVE — refutes hypothesis" else: v = "NO CORRELATION — hypothesis not supported" print(f" Predicate delta verdict: {v}") print() # TERTIARY TEST: within-bucket correlation print() print("=" * 100) print("TERTIARY TEST: Within-bucket correlation") print("=" * 100) by_bucket = defaultdict(list) for j in joined: by_bucket[j['bucket']].append(j) print() print(f"{'Bucket':<12} {'n':<5} {'Single':<10} {'Multi':<10} {'ρ (score vs Δpred)':<22}") print("-" * 75) for bucket in ['high', 'mid', 'low', 'document']: items = by_bucket.get(bucket, []) if not items: continue n = len(items) n_single = sum(1 for j in items if j['binary'] == 'single') n_multi = sum(1 for j in items if j['binary'] == 'multi') if n >= 3: scores_b = [j['score'] for j in items] deltas_b = [j['delta_preds'] for j in items] rho_b = spearman(scores_b, deltas_b) rho_str = f"{rho_b:+.3f}" if rho_b is not None else "n/a (no variance)" else: rho_str = "n/a (too few)" print(f"{bucket:<12} {n:<5} {n_single:<10} {n_multi:<10} {rho_str}") # Interaction with bucket: do single/multi outcomes differ within bucket? print() print("Per-bucket cascade-positive rate by binary purity:") print() print(f"{'Bucket':<12} {'Single':<25} {'Multi':<25}") print("-" * 65) for bucket in ['high', 'mid', 'low', 'document']: items = by_bucket.get(bucket, []) if not items: continue single_items = [j for j in items if j['binary'] == 'single'] multi_items = [j for j in items if j['binary'] == 'multi'] def rate_str(group): if not group: return "—" pos = sum(1 for j in group if j['delta_preds'] > 0) return f"{pos}/{len(group)} positive ({pos/len(group)*100:.0f}%)" print(f"{bucket:<12} {rate_str(single_items):<25} {rate_str(multi_items):<25}") # MEAN DELTA by binary group print() print("=" * 100) print("MEAN PREDICATE DELTA BY GROUP") print("=" * 100) print() for binary in ['single', 'multi']: items = [j for j in joined if j['binary'] == binary] if not items: continue n = len(items) mean_dp = sum(j['delta_preds'] for j in items) / n mean_de = sum(j['delta_edges'] for j in items) / n sum_pp = sum(j['prod_preds'] for j in items) sum_cp = sum(j['cascade_preds'] for j in items) pct_change = (sum_cp - sum_pp) / sum_pp * 100 if sum_pp else 0 print(f"{binary}-domain (n={n}):") print(f" Mean Δpredicates per source: {mean_dp:+.2f}") print(f" Mean Δedges per source: {mean_de:+.2f}") print(f" Aggregate predicate change: {sum_pp} → {sum_cp} ({pct_change:+.1f}%)") print() # Save joined data for the experiments log writeup out_path = "/home/aaron/aaronai/experiments/e16_joined_analysis.json" with open(out_path, "w") as f: json.dump(joined, f, indent=2) print(f"Joined data saved to {out_path}") if __name__ == "__main__": main()