#!/usr/bin/env python3 """ BirdAI Briefing Generator Test =============================== Tests the local LLM as a document briefing generator. The local model produces a structured roadmap for the API — cleaning, structure detection, signal flagging — without semantic judgment. Results written to ~/aaronai/briefing_test_results.json """ import json import os import urllib.request import urllib.error import psycopg2 import psycopg2.extras import hashlib import time from datetime import datetime, timedelta from dotenv import load_dotenv load_dotenv(os.path.expanduser("~/aaronai/.env")) PG_DSN = os.getenv("PG_DSN") RESULTS_FILE = os.path.expanduser("~/aaronai/briefing_test_results.json") MODEL = "mistral" SAMPLE_SIZE = 50 OLLAMA_URL = "http://localhost:11434/api/generate" VALID_DOC_TYPES = { "academic_pdf", "technical_doc", "chat_log", "code", "presentation", "book_excerpt", "form", "syllabus", "email", "notes", "unknown" } VALID_DENSITIES = {"high", "medium", "low"} VALID_PRIORITIES = {"full", "partial", "skip"} BRIEFING_PROMPT = """Analyze this document and return a JSON briefing. No explanation, no prose, JSON only. Return exactly this structure: { "document_type": "one of: academic_pdf, technical_doc, chat_log, code, presentation, book_excerpt, form, syllabus, email, notes, unknown", "primary_language": "language code e.g. en, fr, de", "density": "one of: high, medium, low", "has_proper_nouns": true or false, "has_dates": true or false, "has_numeric_data": true or false, "has_institutional_language": true or false, "has_technical_terms": true or false, "likely_has_named_entities": true or false, "structure_signals": [], "noise_signals": [], "extraction_priority": "one of: full, partial, skip" } Rules: - document_type: identify from formatting patterns and vocabulary, not meaning - density: high=information dense technical or academic text, medium=mixed, low=narrative/literary/sparse - has_proper_nouns: true if you see capitalized words that are not sentence starts - has_dates: true if you see date patterns (numbers with months, years, slashes) - has_numeric_data: true if you see measurements, percentages, statistics - has_institutional_language: true if you see words like university, department, policy, committee, grant - has_technical_terms: true if you see domain-specific jargon or acronyms - likely_has_named_entities: true if has_proper_nouns is true - structure_signals: list any structural markers you see e.g. ["headings", "bullet_lists", "numbered_lists", "tables", "code_blocks", "citations"] - noise_signals: list any noise patterns you see e.g. ["repeated_headers", "page_numbers", "formatting_artifacts", "boilerplate"] - extraction_priority: full if density=high and likely_has_named_entities=true, skip if density=low and likely_has_named_entities=false, partial otherwise Document: """ def get_sample_documents(): if not PG_DSN: raise RuntimeError("PG_DSN not found in .env — cannot connect to database") conn = psycopg2.connect(PG_DSN) cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) cur.execute(""" SELECT DISTINCT ON (source) id, document, source, created_at FROM embeddings WHERE length(document) > 100 AND length(document) < 3000 ORDER BY source, random() LIMIT %s """, (SAMPLE_SIZE,)) docs = cur.fetchall() cur.close() conn.close() return docs def run_briefing(text): prompt = BRIEFING_PROMPT + text[:1500] payload = json.dumps({ "model": MODEL, "prompt": prompt, "stream": False }).encode() raw = "" try: req = urllib.request.Request( OLLAMA_URL, data=payload, headers={"Content-Type": "application/json"} ) with urllib.request.urlopen(req, timeout=180) as resp: result = json.loads(resp.read().decode()) raw = result.get("response", "").strip() start = raw.find("{") end = raw.rfind("}") + 1 if start == -1 or end == 0: return None, f"NO_JSON: {raw[:200]}" json_str = raw[start:end] parsed = json.loads(json_str) if not isinstance(parsed, dict): return None, f"NOT_DICT: {raw[:100]}" return parsed, raw except urllib.error.URLError as e: return None, f"URL_ERROR: {e}" except TimeoutError: return None, "TIMEOUT" except json.JSONDecodeError as e: return None, f"JSON_ERROR: {e} | raw: {raw[:200]}" except Exception as e: return None, f"ERROR: {type(e).__name__}: {e}" def sanitize_briefing(briefing): safe = {} dt = str(briefing.get("document_type", "unknown")).lower().strip() safe["document_type"] = dt if dt in VALID_DOC_TYPES else "unknown" safe["primary_language"] = str(briefing.get("primary_language", "en")).lower().strip()[:10] density = str(briefing.get("density", "medium")).lower().strip() safe["density"] = density if density in VALID_DENSITIES else "medium" for field in ["has_proper_nouns", "has_dates", "has_numeric_data", "has_institutional_language", "has_technical_terms", "likely_has_named_entities"]: val = briefing.get(field, False) if isinstance(val, bool): safe[field] = val elif isinstance(val, str): safe[field] = val.lower() in ("true", "yes", "1") else: safe[field] = bool(val) for field in ["structure_signals", "noise_signals"]: val = briefing.get(field, []) if isinstance(val, list): safe[field] = [str(v) for v in val if v] elif isinstance(val, str): safe[field] = [val] if val else [] else: safe[field] = [] priority = str(briefing.get("extraction_priority", "partial")).lower().strip() safe["extraction_priority"] = priority if priority in VALID_PRIORITIES else "partial" return safe def estimate_token_reduction(original_text, briefing): original_tokens = max(len(original_text) / 4, 1) orientation_saved = 200 if briefing.get("extraction_priority") == "skip": return { "original_tokens_approx": round(original_tokens), "orientation_tokens_saved": round(original_tokens + 200), "noise_reduction_pct": 100.0, "total_reduction_pct": 100.0, "note": "skip — no API call" } noise_count = len(briefing.get("noise_signals", [])) noise_reduction_pct = min(noise_count * 0.05, 0.40) noise_tokens_saved = original_tokens * noise_reduction_pct total_saved = orientation_saved + noise_tokens_saved total_cost = original_tokens + 200 reduction_pct = min((total_saved / total_cost) * 100, 99.0) return { "original_tokens_approx": round(original_tokens), "orientation_tokens_saved": orientation_saved, "noise_tokens_saved": round(noise_tokens_saved), "noise_reduction_pct": round(noise_reduction_pct * 100, 1), "total_reduction_pct": round(reduction_pct, 1) } def format_eta(elapsed_times, completed, total): if completed == 0: return "ETA: --:--" avg = sum(elapsed_times) / completed remaining = (total - completed) * avg eta = timedelta(seconds=int(remaining)) return f"ETA: {str(eta)}" def content_hash(text): return hashlib.md5(text.encode()).hexdigest()[:8] def main(): test_start = time.time() print(f"\nBirdAI Briefing Generator Test") print(f"Model: {MODEL} | Sample: {SAMPLE_SIZE} docs (distinct sources)") print(f"Started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") print(f"Results: {RESULTS_FILE}") print("-" * 75) docs = get_sample_documents() print(f"Loaded {len(docs)} distinct source documents from pgvector\n") results = { "meta": { "model": MODEL, "sample_size": len(docs), "started": datetime.now().isoformat(), "completed": None, "total_elapsed_seconds": None, "avg_seconds_per_doc": None }, "documents": [], "summary": {} } success_count = 0 failed_count = 0 priority_counts = {"full": 0, "partial": 0, "skip": 0} total_reduction_pct = 0.0 elapsed_times = [] for i, doc in enumerate(docs): doc_id = doc["id"] content = doc["document"] source = doc.get("source", "unknown") chash = content_hash(content) eta_str = format_eta(elapsed_times, i, len(docs)) print(f"[{i+1:02d}/{len(docs)}] {source[:38]:<38} {eta_str:<14}", end=" ", flush=True) t_start = time.time() briefing, raw = run_briefing(content) elapsed = round(time.time() - t_start, 1) elapsed_times.append(elapsed) if briefing is None: failed_count += 1 print(f"→ FAILED {elapsed}s | {raw[:50]}") results["documents"].append({ "id": doc_id, "source": source, "content_hash": chash, "content_length": len(content), "status": "FAILED", "error": raw, "elapsed_seconds": elapsed }) else: briefing = sanitize_briefing(briefing) success_count += 1 priority = briefing["extraction_priority"] doc_type = briefing["document_type"] density = briefing["density"] priority_counts[priority] = priority_counts.get(priority, 0) + 1 reduction = estimate_token_reduction(content, briefing) total_reduction_pct += reduction["total_reduction_pct"] print(f"→ {priority.upper():<7} {doc_type:<15} density:{density:<6} -{reduction['total_reduction_pct']:>5.1f}% {elapsed}s") results["documents"].append({ "id": doc_id, "source": source, "content_hash": chash, "content_length": len(content), "status": "SUCCESS", "elapsed_seconds": elapsed, "briefing": briefing, "token_reduction_estimate": reduction }) with open(RESULTS_FILE, "w") as f: json.dump(results, f, indent=2, default=str) total_elapsed = round(time.time() - test_start, 1) avg_per_doc = round(total_elapsed / len(docs), 1) if docs else 0 completed_at = datetime.now().isoformat() results["meta"]["completed"] = completed_at results["meta"]["total_elapsed_seconds"] = total_elapsed results["meta"]["avg_seconds_per_doc"] = avg_per_doc total = len(docs) avg_reduction = round(total_reduction_pct / success_count, 1) if success_count else 0 summary = { "total": total, "success": success_count, "failed": failed_count, "success_rate": round(success_count / total * 100, 1), "extraction_priority_breakdown": priority_counts, "avg_token_reduction_pct": avg_reduction, "total_elapsed_seconds": total_elapsed, "avg_seconds_per_doc": avg_per_doc, "projected_50_doc_minutes": round((avg_per_doc * 50) / 60, 1), "approach_viable": success_count / total >= 0.8 } results["summary"] = summary with open(RESULTS_FILE, "w") as f: json.dump(results, f, indent=2, default=str) print("\n" + "=" * 75) print(f"RESULTS") print(f" Success rate: {success_count}/{total} ({summary['success_rate']}%)") print(f" Failed: {failed_count}") print(f" Priority — full: {priority_counts.get('full', 0)}") print(f" Priority — partial: {priority_counts.get('partial', 0)}") print(f" Priority — skip: {priority_counts.get('skip', 0)}") print(f" Avg token reduction: {avg_reduction}%") print(f" Total elapsed: {total_elapsed}s ({round(total_elapsed/60, 1)} min)") print(f" Avg per document: {avg_per_doc}s") print(f" Projected 50 docs: {summary['projected_50_doc_minutes']} min") print(f" Approach viable: {'YES' if summary['approach_viable'] else 'NO'}") print(f" Completed: {completed_at}") print(f" Full results: {RESULTS_FILE}") print("=" * 75) if __name__ == "__main__": main()