ed2d090afc
Read-only inspection of the frame data Mistral produces in Stage 2, in service of Track 2 substrate design (Step 2.4 operation set spec). Artifacts: - New SQL view `stage2_frames_v` over `stage_3_queue.stage2_metadata` (CREATE OR REPLACE; idempotent; raw JSONB exposed alongside structured fields so worker-version drift is inspectable). - Analysis script: frequency, label-hygiene collisions, per-doc count, co-occurrence (top-K), file-type \u00d7 frame cross-tab, worker-version split, data-gap accounting, corpus-wide coverage. - JSON sidecar for diff-across-runs reproducibility. - Markdown report with explicit Track 2 viability section. Headline findings: - Frames cluster meaningfully on the framed-doc subset (subject to validation on larger samples for the file-type cross-tab). - Only 56% of corpus has frame coverage. 198 conversation sources bypass Stage 2 by design (`ingest_conversations.py` writes directly to embeddings); 339 short docs (<2000 chars) skip Mistral by char-gate; 12 Stage 2 failures. - All 14 voice notes and all 39 dream outputs are in the data gap. Primary capture and self-reflection channels are silent to the frame system. Dreamer cannot frame-condition on its own output. - 54 normalized label collisions (`Professional Experience` vs `Professional_Experience`, etc.) — any router must normalize first. - "Education" is a near-universal frame (36% of frame-extracted docs); cheap 20-doc hand-inspection diagnostic in report \u00a78 to distinguish prompt artifact from corpus shape. - File-type \u00d7 frame stratification is concrete signal that ties to Improvement #2 (`embeddings.type` backfill); currently NULL for 71% of rows. No production code touched. View is droppable; script is read-only.
987 lines
14 KiB
JSON
987 lines
14 KiB
JSON
{
|
|
"generated_at": "2026-05-03T20:21:33.558462",
|
|
"n_docs_with_frames": 668,
|
|
"n_distinct_labels": 1374,
|
|
"top_30_frames": [
|
|
[
|
|
"Education",
|
|
238
|
|
],
|
|
[
|
|
"Course",
|
|
58
|
|
],
|
|
[
|
|
"Programming",
|
|
43
|
|
],
|
|
[
|
|
"Design",
|
|
32
|
|
],
|
|
[
|
|
"Professional Experience",
|
|
24
|
|
],
|
|
[
|
|
"Employment",
|
|
24
|
|
],
|
|
[
|
|
"Research",
|
|
23
|
|
],
|
|
[
|
|
"3D Printing",
|
|
22
|
|
],
|
|
[
|
|
"Project",
|
|
21
|
|
],
|
|
[
|
|
"Grading",
|
|
21
|
|
],
|
|
[
|
|
"Art",
|
|
21
|
|
],
|
|
[
|
|
"Budget",
|
|
21
|
|
],
|
|
[
|
|
"Academic Integrity",
|
|
20
|
|
],
|
|
[
|
|
"Teaching",
|
|
19
|
|
],
|
|
[
|
|
"Technology",
|
|
18
|
|
],
|
|
[
|
|
"Attendance",
|
|
17
|
|
],
|
|
[
|
|
"Application",
|
|
15
|
|
],
|
|
[
|
|
"Accommodation",
|
|
13
|
|
],
|
|
[
|
|
"Manufacturing",
|
|
13
|
|
],
|
|
[
|
|
"Coursework",
|
|
11
|
|
],
|
|
[
|
|
"Recommendation",
|
|
10
|
|
],
|
|
[
|
|
"Manufacturing Process",
|
|
10
|
|
],
|
|
[
|
|
"Additive Manufacturing",
|
|
10
|
|
],
|
|
[
|
|
"Job Application",
|
|
10
|
|
],
|
|
[
|
|
"Exhibitions",
|
|
10
|
|
],
|
|
[
|
|
"Academic Administration",
|
|
9
|
|
],
|
|
[
|
|
"Communication",
|
|
9
|
|
],
|
|
[
|
|
"Course Design",
|
|
9
|
|
],
|
|
[
|
|
"Veteran and Military Services",
|
|
9
|
|
],
|
|
[
|
|
"Career",
|
|
9
|
|
]
|
|
],
|
|
"label_collisions": {
|
|
"conversational": [
|
|
[
|
|
"Conversational",
|
|
1
|
|
],
|
|
[
|
|
"conversational",
|
|
1
|
|
]
|
|
],
|
|
"content": [
|
|
[
|
|
"Content",
|
|
1
|
|
],
|
|
[
|
|
"content",
|
|
1
|
|
]
|
|
],
|
|
"cascade": [
|
|
[
|
|
"Cascade",
|
|
1
|
|
],
|
|
[
|
|
"cascade",
|
|
1
|
|
]
|
|
],
|
|
"education": [
|
|
[
|
|
"Education",
|
|
238
|
|
],
|
|
[
|
|
"education",
|
|
1
|
|
]
|
|
],
|
|
"academic record": [
|
|
[
|
|
"Academic_Record",
|
|
1
|
|
],
|
|
[
|
|
"Academic Record",
|
|
1
|
|
]
|
|
],
|
|
"independent study": [
|
|
[
|
|
"Independent Study",
|
|
5
|
|
],
|
|
[
|
|
"Independent_Study",
|
|
2
|
|
]
|
|
],
|
|
"project management": [
|
|
[
|
|
"Project Management",
|
|
7
|
|
],
|
|
[
|
|
"Project_Management",
|
|
1
|
|
]
|
|
],
|
|
"digital fabrication": [
|
|
[
|
|
"Digital Fabrication",
|
|
6
|
|
],
|
|
[
|
|
"digital_fabrication",
|
|
1
|
|
],
|
|
[
|
|
"digital fabrication",
|
|
1
|
|
]
|
|
],
|
|
"project proposal": [
|
|
[
|
|
"Project_Proposal",
|
|
2
|
|
],
|
|
[
|
|
"Project Proposal",
|
|
2
|
|
]
|
|
],
|
|
"academic integrity": [
|
|
[
|
|
"Academic Integrity",
|
|
20
|
|
],
|
|
[
|
|
"Academic_Integrity",
|
|
2
|
|
]
|
|
],
|
|
"3d printing": [
|
|
[
|
|
"3D Printing",
|
|
22
|
|
],
|
|
[
|
|
"3D_Printing",
|
|
7
|
|
]
|
|
],
|
|
"technical skills": [
|
|
[
|
|
"Technical Skills",
|
|
2
|
|
],
|
|
[
|
|
"Technical_Skills",
|
|
1
|
|
]
|
|
],
|
|
"course structure": [
|
|
[
|
|
"Course Structure",
|
|
7
|
|
],
|
|
[
|
|
"Course_Structure",
|
|
1
|
|
]
|
|
],
|
|
"course design": [
|
|
[
|
|
"Course Design",
|
|
9
|
|
],
|
|
[
|
|
"Course_Design",
|
|
1
|
|
]
|
|
],
|
|
"product design": [
|
|
[
|
|
"Product Design",
|
|
6
|
|
],
|
|
[
|
|
"Product_Design",
|
|
1
|
|
]
|
|
],
|
|
"professional experience": [
|
|
[
|
|
"Professional Experience",
|
|
24
|
|
],
|
|
[
|
|
"Professional_Experience",
|
|
6
|
|
]
|
|
],
|
|
"disability accommodations": [
|
|
[
|
|
"Disability Accommodations",
|
|
4
|
|
],
|
|
[
|
|
"Disability_Accommodations",
|
|
1
|
|
]
|
|
],
|
|
"material science": [
|
|
[
|
|
"Material_Science",
|
|
2
|
|
],
|
|
[
|
|
"Material Science",
|
|
4
|
|
]
|
|
],
|
|
"computational design": [
|
|
[
|
|
"Computational Design",
|
|
7
|
|
],
|
|
[
|
|
"Computational_Design",
|
|
1
|
|
]
|
|
],
|
|
"computer services policy": [
|
|
[
|
|
"Computer Services Policy",
|
|
6
|
|
],
|
|
[
|
|
"Computer_Services_Policy",
|
|
1
|
|
]
|
|
],
|
|
"work experience": [
|
|
[
|
|
"Work_Experience",
|
|
1
|
|
],
|
|
[
|
|
"Work Experience",
|
|
3
|
|
]
|
|
],
|
|
"academic program": [
|
|
[
|
|
"Academic Program",
|
|
7
|
|
],
|
|
[
|
|
"Academic_Program",
|
|
1
|
|
]
|
|
],
|
|
"project-based learning": [
|
|
[
|
|
"Project-Based Learning",
|
|
5
|
|
],
|
|
[
|
|
"Project-Based_Learning",
|
|
1
|
|
],
|
|
[
|
|
"Project-based Learning",
|
|
2
|
|
]
|
|
],
|
|
"art and design": [
|
|
[
|
|
"Art and Design",
|
|
6
|
|
],
|
|
[
|
|
"Art_and_Design",
|
|
1
|
|
]
|
|
],
|
|
"fdm technology": [
|
|
[
|
|
"FDM_Technology",
|
|
2
|
|
],
|
|
[
|
|
"FDM Technology",
|
|
1
|
|
]
|
|
],
|
|
"material selection": [
|
|
[
|
|
"Material_Selection",
|
|
1
|
|
],
|
|
[
|
|
"Material Selection",
|
|
1
|
|
]
|
|
],
|
|
"product development": [
|
|
[
|
|
"Product Development",
|
|
6
|
|
],
|
|
[
|
|
"Product_Development",
|
|
2
|
|
]
|
|
],
|
|
"market research": [
|
|
[
|
|
"Market_Research",
|
|
1
|
|
],
|
|
[
|
|
"Market Research",
|
|
2
|
|
]
|
|
],
|
|
"computer services": [
|
|
[
|
|
"Computer Services",
|
|
2
|
|
],
|
|
[
|
|
"Computer_Services",
|
|
1
|
|
]
|
|
],
|
|
"student evaluation of instruction": [
|
|
[
|
|
"Student Evaluation of Instruction",
|
|
1
|
|
],
|
|
[
|
|
"Student_Evaluation_of_Instruction",
|
|
1
|
|
]
|
|
],
|
|
"course management": [
|
|
[
|
|
"Course_Management",
|
|
1
|
|
],
|
|
[
|
|
"Course Management",
|
|
1
|
|
]
|
|
],
|
|
"grade policy": [
|
|
[
|
|
"Grade_Policy",
|
|
1
|
|
],
|
|
[
|
|
"Grade Policy",
|
|
1
|
|
]
|
|
],
|
|
"academic transcript": [
|
|
[
|
|
"Academic_Transcript",
|
|
1
|
|
],
|
|
[
|
|
"Academic Transcript",
|
|
1
|
|
]
|
|
],
|
|
"evaluation criteria": [
|
|
[
|
|
"Evaluation Criteria",
|
|
1
|
|
],
|
|
[
|
|
"Evaluation_Criteria",
|
|
1
|
|
]
|
|
],
|
|
"computer science": [
|
|
[
|
|
"Computer Science",
|
|
2
|
|
],
|
|
[
|
|
"Computer_Science",
|
|
1
|
|
]
|
|
],
|
|
"electrical circuit": [
|
|
[
|
|
"Electrical Circuit",
|
|
2
|
|
],
|
|
[
|
|
"Electrical_Circuit",
|
|
1
|
|
]
|
|
],
|
|
"digital logic": [
|
|
[
|
|
"Digital Logic",
|
|
1
|
|
],
|
|
[
|
|
"Digital_Logic",
|
|
1
|
|
]
|
|
],
|
|
"course description": [
|
|
[
|
|
"Course Description",
|
|
3
|
|
],
|
|
[
|
|
"Course_Description",
|
|
1
|
|
]
|
|
],
|
|
"organizational structure": [
|
|
[
|
|
"Organizational_Structure",
|
|
1
|
|
],
|
|
[
|
|
"Organizational Structure",
|
|
1
|
|
]
|
|
],
|
|
"digital design": [
|
|
[
|
|
"Digital_Design",
|
|
1
|
|
],
|
|
[
|
|
"Digital Design",
|
|
4
|
|
]
|
|
],
|
|
"contact information": [
|
|
[
|
|
"Contact Information",
|
|
2
|
|
],
|
|
[
|
|
"Contact_Information",
|
|
1
|
|
]
|
|
],
|
|
"professional career": [
|
|
[
|
|
"Professional_Career",
|
|
2
|
|
],
|
|
[
|
|
"Professional Career",
|
|
1
|
|
]
|
|
],
|
|
"personal projects": [
|
|
[
|
|
"Personal_Projects",
|
|
1
|
|
],
|
|
[
|
|
"Personal Projects",
|
|
2
|
|
]
|
|
],
|
|
"ai development": [
|
|
[
|
|
"AI_Development",
|
|
1
|
|
],
|
|
[
|
|
"AI Development",
|
|
1
|
|
]
|
|
],
|
|
"university service": [
|
|
[
|
|
"University Service",
|
|
2
|
|
],
|
|
[
|
|
"University_Service",
|
|
1
|
|
]
|
|
],
|
|
"professional exhibitions and publications": [
|
|
[
|
|
"Professional Exhibitions and Publications",
|
|
1
|
|
],
|
|
[
|
|
"Professional_Exhibitions_and_Publications",
|
|
1
|
|
]
|
|
],
|
|
"selected external consulting and design work": [
|
|
[
|
|
"Selected External Consulting and Design Work",
|
|
1
|
|
],
|
|
[
|
|
"Selected_External_Consulting_and_Design_Work",
|
|
2
|
|
]
|
|
],
|
|
"academic career": [
|
|
[
|
|
"Academic_Career",
|
|
1
|
|
],
|
|
[
|
|
"Academic Career",
|
|
2
|
|
]
|
|
],
|
|
"technology integration": [
|
|
[
|
|
"Technology Integration",
|
|
2
|
|
],
|
|
[
|
|
"Technology_Integration",
|
|
1
|
|
]
|
|
],
|
|
"artistic practice": [
|
|
[
|
|
"Artistic_Practice",
|
|
1
|
|
],
|
|
[
|
|
"Artistic Practice",
|
|
1
|
|
]
|
|
],
|
|
"multi-material 3d printing": [
|
|
[
|
|
"Multi-Material 3D Printing",
|
|
1
|
|
],
|
|
[
|
|
"Multi-material 3D Printing",
|
|
1
|
|
]
|
|
],
|
|
"community engagement": [
|
|
[
|
|
"Community Engagement",
|
|
3
|
|
],
|
|
[
|
|
"Community_Engagement",
|
|
1
|
|
]
|
|
],
|
|
"digitaldesignandfabrication": [
|
|
[
|
|
"DigitalDesignAndFabrication",
|
|
1
|
|
],
|
|
[
|
|
"DigitalDesignandFabrication",
|
|
1
|
|
]
|
|
],
|
|
"professional background": [
|
|
[
|
|
"Professional Background",
|
|
3
|
|
],
|
|
[
|
|
"Professional_Background",
|
|
1
|
|
]
|
|
]
|
|
},
|
|
"per_doc_frame_count": {
|
|
"3": 282,
|
|
"5": 67,
|
|
"4": 195,
|
|
"2": 57,
|
|
"7": 13,
|
|
"11": 5,
|
|
"13": 2,
|
|
"15": 1,
|
|
"12": 4,
|
|
"6": 21,
|
|
"8": 8,
|
|
"10": 4,
|
|
"9": 6,
|
|
"30": 1,
|
|
"14": 1,
|
|
"18": 1
|
|
},
|
|
"top_30_pairs": [
|
|
{
|
|
"a": "Course",
|
|
"b": "Education",
|
|
"count": 46
|
|
},
|
|
{
|
|
"a": "Education",
|
|
"b": "Project",
|
|
"count": 20
|
|
},
|
|
{
|
|
"a": "Design",
|
|
"b": "Education",
|
|
"count": 20
|
|
},
|
|
{
|
|
"a": "Education",
|
|
"b": "Professional Experience",
|
|
"count": 20
|
|
},
|
|
{
|
|
"a": "Education",
|
|
"b": "Employment",
|
|
"count": 20
|
|
},
|
|
{
|
|
"a": "Education",
|
|
"b": "Technology",
|
|
"count": 18
|
|
},
|
|
{
|
|
"a": "Education",
|
|
"b": "Grading",
|
|
"count": 17
|
|
},
|
|
{
|
|
"a": "Education",
|
|
"b": "Research",
|
|
"count": 15
|
|
},
|
|
{
|
|
"a": "Art",
|
|
"b": "Education",
|
|
"count": 15
|
|
},
|
|
{
|
|
"a": "Attendance",
|
|
"b": "Grading",
|
|
"count": 14
|
|
},
|
|
{
|
|
"a": "Course",
|
|
"b": "Grading",
|
|
"count": 13
|
|
},
|
|
{
|
|
"a": "Academic Integrity",
|
|
"b": "Education",
|
|
"count": 11
|
|
},
|
|
{
|
|
"a": "Attendance",
|
|
"b": "Education",
|
|
"count": 11
|
|
},
|
|
{
|
|
"a": "Attendance",
|
|
"b": "Course",
|
|
"count": 11
|
|
},
|
|
{
|
|
"a": "Application",
|
|
"b": "Employment",
|
|
"count": 11
|
|
},
|
|
{
|
|
"a": "Coursework",
|
|
"b": "Education",
|
|
"count": 10
|
|
},
|
|
{
|
|
"a": "Course",
|
|
"b": "Design",
|
|
"count": 10
|
|
},
|
|
{
|
|
"a": "Course",
|
|
"b": "Programming",
|
|
"count": 10
|
|
},
|
|
{
|
|
"a": "Application",
|
|
"b": "Education",
|
|
"count": 10
|
|
},
|
|
{
|
|
"a": "Budget",
|
|
"b": "Education",
|
|
"count": 10
|
|
},
|
|
{
|
|
"a": "Academic Integrity",
|
|
"b": "Accommodation",
|
|
"count": 9
|
|
},
|
|
{
|
|
"a": "Education",
|
|
"b": "Teaching",
|
|
"count": 9
|
|
},
|
|
{
|
|
"a": "Education",
|
|
"b": "Programming",
|
|
"count": 9
|
|
},
|
|
{
|
|
"a": "Academic Integrity",
|
|
"b": "Attendance",
|
|
"count": 9
|
|
},
|
|
{
|
|
"a": "Course",
|
|
"b": "Project",
|
|
"count": 8
|
|
},
|
|
{
|
|
"a": "Research",
|
|
"b": "Teaching",
|
|
"count": 8
|
|
},
|
|
{
|
|
"a": "Grading",
|
|
"b": "Project",
|
|
"count": 7
|
|
},
|
|
{
|
|
"a": "Art",
|
|
"b": "Technology",
|
|
"count": 7
|
|
},
|
|
{
|
|
"a": "Academic Integrity",
|
|
"b": "Course",
|
|
"count": 7
|
|
},
|
|
{
|
|
"a": "Accommodation",
|
|
"b": "Course",
|
|
"count": 7
|
|
}
|
|
],
|
|
"folder_crosstab": {
|
|
"Education": {
|
|
"pdf": 116,
|
|
"docx": 119,
|
|
"pptx": 3
|
|
},
|
|
"Course": {
|
|
"pdf": 29,
|
|
"docx": 29
|
|
},
|
|
"Programming": {
|
|
"pptx": 15,
|
|
"docx": 10,
|
|
"pdf": 12,
|
|
"txt": 6
|
|
},
|
|
"Design": {
|
|
"pdf": 13,
|
|
"docx": 16,
|
|
"pptx": 3
|
|
},
|
|
"Professional Experience": {
|
|
"docx": 13,
|
|
"pdf": 11
|
|
},
|
|
"Employment": {
|
|
"pdf": 15,
|
|
"docx": 9
|
|
},
|
|
"Research": {
|
|
"pdf": 9,
|
|
"docx": 13,
|
|
"markdown": 1
|
|
},
|
|
"3D Printing": {
|
|
"docx": 3,
|
|
"pdf": 11,
|
|
"pptx": 8
|
|
},
|
|
"Project": {
|
|
"pdf": 8,
|
|
"docx": 12,
|
|
"markdown": 1
|
|
},
|
|
"Grading": {
|
|
"pdf": 10,
|
|
"docx": 11
|
|
},
|
|
"Art": {
|
|
"docx": 11,
|
|
"pdf": 9,
|
|
"pptx": 1
|
|
},
|
|
"Budget": {
|
|
"docx": 6,
|
|
"pdf": 15
|
|
},
|
|
"Academic Integrity": {
|
|
"docx": 17,
|
|
"pdf": 3
|
|
},
|
|
"Teaching": {
|
|
"pdf": 9,
|
|
"docx": 10
|
|
},
|
|
"Technology": {
|
|
"docx": 15,
|
|
"pdf": 3
|
|
},
|
|
"Attendance": {
|
|
"docx": 11,
|
|
"pdf": 6
|
|
},
|
|
"Application": {
|
|
"pdf": 13,
|
|
"docx": 2
|
|
},
|
|
"Accommodation": {
|
|
"docx": 11,
|
|
"pdf": 2
|
|
},
|
|
"Manufacturing": {
|
|
"docx": 6,
|
|
"pptx": 4,
|
|
"pdf": 3
|
|
},
|
|
"Coursework": {
|
|
"pdf": 8,
|
|
"docx": 3
|
|
}
|
|
},
|
|
"bin_totals": {
|
|
"markdown": 64,
|
|
"pdf": 286,
|
|
"pptx": 70,
|
|
"txt": 28,
|
|
"docx": 217,
|
|
"dream_output": 3
|
|
},
|
|
"worker_versions": {
|
|
"2.0": 3,
|
|
"2.1": 665
|
|
},
|
|
"data_gap": {
|
|
"count": 339,
|
|
"by_type_bin": {
|
|
"pdf": 110,
|
|
"voice_note": 14,
|
|
"docx": 110,
|
|
"dream_output": 39,
|
|
"pptx": 31,
|
|
"txt": 28,
|
|
"markdown": 7
|
|
},
|
|
"char_length": {
|
|
"min": 6,
|
|
"max": 1998,
|
|
"median": 1077
|
|
},
|
|
"sample_sources": [
|
|
"Thesis Paper Guidlines.pdf",
|
|
"2026-04-30-17-06-voice.md",
|
|
"2026-04-30-15-59-voice.md",
|
|
"2026-04-30-16-53-voice.md",
|
|
"2026-04-30-16-23-voice.md",
|
|
"2026-04-29-17-52-voice.md",
|
|
"2026-04-30-16-59-voice.md",
|
|
"Outline for 3D Printed Materials for Foundry Casting.docx",
|
|
"2026-04-26-22-52-voice.md",
|
|
"2026-04-30-synthesis.md"
|
|
]
|
|
},
|
|
"corpus_coverage": {
|
|
"total_distinct_sources_in_embeddings": 1255,
|
|
"conversations_no_frames_by_design": 198,
|
|
"files_with_frames": 704,
|
|
"files_short_no_frames": 339,
|
|
"files_stage2_failed": 12,
|
|
"frame_coverage_pct": 56.1
|
|
}
|
|
} |