add experiment scripts and results; watcher.py latest changes

This commit is contained in:
2026-04-30 18:06:03 +00:00
parent 1cf26df450
commit f11cacd9c9
55 changed files with 23594 additions and 726 deletions
+791
View File
@@ -0,0 +1,791 @@
{
"metadata": {
"generated_at": "2026-04-28T21:10:25",
"source_validation_file": "/home/aaron/aaronai/experiments/base_class_validation_results.json",
"seed": 43,
"stratification": "type-aware within length bucket",
"type_targets": {
"small": {
"course_module": 2,
"voice_capture": 2
},
"medium": {
"course_module": 2,
"syllabus": 1,
"other": 1
},
"large": {
"course_ppt": 1,
"syllabus": 1,
"faculty_report": 1,
"conversational": 1
}
},
"bucket_counts": {
"small": 4,
"medium": 4,
"large": 4
},
"excluded_count": 10,
"warnings": [],
"purpose": "n=20 audit expansion per audit-expansion-protocol.md (type-aware amendment)"
},
"results": [
{
"source": "02_2D Geometry.docx",
"size_bucket": "small",
"doc_chars_original": 188,
"doc_chars_sent": 188,
"truncated": false,
"condition_a": {
"input_tokens": 207,
"output_tokens": 718,
"latency_s": 3.29,
"metrics": {
"n_entities": 10,
"n_edges": 21,
"predicate_diversity": 3,
"type_diversity": 3,
"avg_degree": 4.2,
"largest_component": 10,
"largest_component_pct": 100.0
},
"stop_reason": "end_turn",
"response_text": "```json\n{\n \"entities\": [\n {\"name\": \"2D geometry\", \"type\": \"geometric concept\"},\n {\"name\": \"Curves\", \"type\": \"geometric element\"},\n {\"name\": \"lines\", \"type\": \"geometric element\"},\n {\"name\": \"Poly lines\", \"type\": \"geometric element\"},\n {\"name\": \"planar\", \"type\": \"geometric property\"},\n {\"name\": \"non-planar\", \"type\": \"geometric property\"},\n {\"name\": \"open\", \"type\": \"geometric property\"},\n {\"name\": \"closed\", \"type\": \"geometric property\"},\n {\"name\": \"complex forms\", \"type\": \"geometric concept\"},\n {\"name\": \"geometries\", \"type\": \"geometric concept\"}\n ],\n \"edges\": [\n {\"subject\": \"2D geometry\", \"predicate\": \"is comprised of\", \"object\": \"Curves\"},\n {\"subject\": \"2D geometry\", \"predicate\": \"is comprised of\", \"object\": \"lines\"},\n {\"subject\": \"2D geometry\", \"predicate\": \"is comprised of\", \"object\": \"Poly lines\"},\n {\"subject\": \"Curves\", \"predicate\": \"can be\", \"object\": \"planar\"},\n {\"subject\": \"Curves\", \"predicate\": \"can be\", \"object\": \"non-planar\"},\n {\"subject\": \"Curves\", \"predicate\": \"can be\", \"object\": \"open\"},\n {\"subject\": \"Curves\", \"predicate\": \"can be\", \"object\": \"closed\"},\n {\"subject\": \"lines\", \"predicate\": \"can be\", \"object\": \"planar\"},\n {\"subject\": \"lines\", \"predicate\": \"can be\", \"object\": \"non-planar\"},\n {\"subject\": \"lines\", \"predicate\": \"can be\", \"object\": \"open\"},\n {\"subject\": \"lines\", \"predicate\": \"can be\", \"object\": \"closed\"},\n {\"subject\": \"Poly lines\", \"predicate\": \"can be\", \"object\": \"planar\"},\n {\"subject\": \"Poly lines\", \"predicate\": \"can be\", \"object\": \"non-planar\"},\n {\"subject\": \"Poly lines\", \"predicate\": \"can be\", \"object\": \"open\"},\n {\"subject\": \"Poly lines\", \"predicate\": \"can be\", \"object\": \"closed\"},\n {\"subject\": \"Curves\", \"predicate\": \"are building blocks to produce\", \"object\": \"complex forms\"},\n {\"subject\": \"lines\", \"predicate\": \"are building blocks to produce\", \"object\": \"complex forms\"},\n {\"subject\": \"Poly lines\", \"predicate\": \"are building blocks to produce\", \"object\": \"complex forms\"},\n {\"subject\": \"Curves\", \"predicate\": \"are building blocks to produce\", \"object\": \"geometries\"},\n {\"subject\": \"lines\", \"predicate\": \"are building blocks to produce\", \"object\": \"geometries\"},\n {\"subject\": \"Poly lines\", \"predicate\": \"are building blocks to produce\", \"object\": \"geometries\"}\n ]\n}\n```",
"error": null
},
"condition_b": {
"skipped": "local_model_failed",
"local_error": "HTTPConnectionPool(host='localhost', port=11434): Read timed out. (read timeout=180)",
"local_latency_s": 180.1
},
"_type": "course_module"
},
{
"source": "03_2D Transformation, Deformation, and Editing-AARON.docx",
"size_bucket": "small",
"doc_chars_original": 541,
"doc_chars_sent": 541,
"truncated": false,
"condition_a": {
"input_tokens": 271,
"output_tokens": 587,
"latency_s": 2.9,
"metrics": {
"n_entities": 14,
"n_edges": 13,
"predicate_diversity": 9,
"type_diversity": 4,
"avg_degree": 1.86,
"largest_component": 11,
"largest_component_pct": 78.6
},
"stop_reason": "end_turn",
"response_text": "```json\n{\n \"entities\": [\n {\"name\": \"Manipulation of geometry\", \"type\": \"Concept\"},\n {\"name\": \"Transformations\", \"type\": \"Category\"},\n {\"name\": \"Deformations\", \"type\": \"Category\"},\n {\"name\": \"proportions\", \"type\": \"Property\"},\n {\"name\": \"relative geometries\", \"type\": \"Property\"},\n {\"name\": \"curves\", \"type\": \"Geometric Element\"},\n {\"name\": \"objects\", \"type\": \"Geometric Element\"},\n {\"name\": \"space\", \"type\": \"Concept\"},\n {\"name\": \"angles\", \"type\": \"Property\"},\n {\"name\": \"curvature\", \"type\": \"Property\"},\n {\"name\": \"Editing Commands\", \"type\": \"Concept\"},\n {\"name\": \"geometry\", \"type\": \"Concept\"},\n {\"name\": \"sections\", \"type\": \"Geometric Element\"},\n {\"name\": \"form\", \"type\": \"Geometric Element\"}\n ],\n \"edges\": [\n {\"subject\": \"Manipulation of geometry\", \"predicate\": \"can be broken down into\", \"object\": \"Transformations\"},\n {\"subject\": \"Manipulation of geometry\", \"predicate\": \"can be broken down into\", \"object\": \"Deformations\"},\n {\"subject\": \"Transformations\", \"predicate\": \"do not change\", \"object\": \"proportions\"},\n {\"subject\": \"Transformations\", \"predicate\": \"do not change\", \"object\": \"relative geometries\"},\n {\"subject\": \"Transformations\", \"predicate\": \"change\", \"object\": \"space\"},\n {\"subject\": \"Deformations\", \"predicate\": \"are changes made to\", \"object\": \"geometry\"},\n {\"subject\": \"Deformations\", \"predicate\": \"affect\", \"object\": \"proportions\"},\n {\"subject\": \"Deformations\", \"predicate\": \"affect\", \"object\": \"angles\"},\n {\"subject\": \"Deformations\", \"predicate\": \"affect\", \"object\": \"curvature\"},\n {\"subject\": \"Editing Commands\", \"predicate\": \"can edit\", \"object\": \"geometry\"},\n {\"subject\": \"Editing Commands\", \"predicate\": \"involves adding\", \"object\": \"sections\"},\n {\"subject\": \"Editing Commands\", \"predicate\": \"involves combining\", \"object\": \"sections\"},\n {\"subject\": \"Editing Commands\", \"predicate\": \"involves removing\", \"object\": \"sections\"}\n ]\n}\n```",
"error": null
},
"condition_b": {
"local_latency_s": 112.09,
"local_metadata": {
"language": "en",
"char_length": 230,
"primary_format": "prose",
"structural_signals": {
"has_headings": false,
"has_bullet_lists": false,
"has_numbered_lists": false,
"has_tables": false,
"has_code_blocks": false,
"has_dates": false
},
"content_signals": {
"has_named_people": false,
"has_institutional_language": false,
"has_technical_terminology": true,
"has_first_person": false,
"has_quotations": false
},
"domain_class": "technical",
"one_sentence_summary": "Document discusses manipulation of geometry through transformations and deformations, with methods for editing geometry"
},
"local_raw": "{\n \"language\": \"en\",\n \"char_length\": 230,\n \"primary_format\": \"prose\",\n \"structural_signals\": {\n \"has_headings\": false,\n \"has_bullet_lists\": false,\n \"has_numbered_lists\": false,\n \"has_tables\": false,\n \"has_code_blocks\": false,\n \"has_dates\": false\n },\n \"content_signals\": {\n \"has_named_people\": false,\n \"has_institutional_language\": false,\n \"has_technical_terminology\": true,\n \"has_first_person\": false,\n \"has_quotations\": false\n },\n \"domain_class\": \"technical\",\n \"one_sentence_summary\": \"Document discusses manipulation of geometry through transformations and deformations, with methods for editing geometry\"\n}",
"api_input_tokens": 533,
"api_output_tokens": 691,
"api_latency_s": 3.39,
"metrics": {
"n_entities": 17,
"n_edges": 16,
"predicate_diversity": 8,
"type_diversity": 5,
"avg_degree": 1.88,
"largest_component": 13,
"largest_component_pct": 76.5
},
"stop_reason": "end_turn",
"response_text": "```json\n{\n \"entities\": [\n {\"name\": \"Manipulation of geometry\", \"type\": \"concept\"},\n {\"name\": \"Transformations\", \"type\": \"category\"},\n {\"name\": \"Deformations\", \"type\": \"category\"},\n {\"name\": \"proportions\", \"type\": \"attribute\"},\n {\"name\": \"relative geometries\", \"type\": \"attribute\"},\n {\"name\": \"curves\", \"type\": \"geometric_element\"},\n {\"name\": \"objects\", \"type\": \"geometric_element\"},\n {\"name\": \"space\", \"type\": \"concept\"},\n {\"name\": \"angles\", \"type\": \"attribute\"},\n {\"name\": \"curvature\", \"type\": \"attribute\"},\n {\"name\": \"Editing Commands\", \"type\": \"concept\"},\n {\"name\": \"geometry\", \"type\": \"concept\"},\n {\"name\": \"adding\", \"type\": \"operation\"},\n {\"name\": \"combining\", \"type\": \"operation\"},\n {\"name\": \"removing\", \"type\": \"operation\"},\n {\"name\": \"sections\", \"type\": \"geometric_element\"},\n {\"name\": \"form\", \"type\": \"geometric_element\"}\n ],\n \"edges\": [\n {\"subject\": \"Manipulation of geometry\", \"predicate\": \"breaks down into\", \"object\": \"Transformations\"},\n {\"subject\": \"Manipulation of geometry\", \"predicate\": \"breaks down into\", \"object\": \"Deformations\"},\n {\"subject\": \"Transformations\", \"predicate\": \"do not change\", \"object\": \"proportions\"},\n {\"subject\": \"Transformations\", \"predicate\": \"do not change\", \"object\": \"relative geometries\"},\n {\"subject\": \"Transformations\", \"predicate\": \"change\", \"object\": \"relationship to space\"},\n {\"subject\": \"Deformations\", \"predicate\": \"are changes to\", \"object\": \"geometry\"},\n {\"subject\": \"Deformations\", \"predicate\": \"affect\", \"object\": \"proportions\"},\n {\"subject\": \"Deformations\", \"predicate\": \"affect\", \"object\": \"angles\"},\n {\"subject\": \"Deformations\", \"predicate\": \"affect\", \"object\": \"curvature\"},\n {\"subject\": \"Editing Commands\", \"predicate\": \"can edit\", \"object\": \"geometry\"},\n {\"subject\": \"adding\", \"predicate\": \"is method for\", \"object\": \"Editing Commands\"},\n {\"subject\": \"combining\", \"predicate\": \"is method for\", \"object\": \"Editing Commands\"},\n {\"subject\": \"removing\", \"predicate\": \"is method for\", \"object\": \"Editing Commands\"},\n {\"subject\": \"adding\", \"predicate\": \"operates on\", \"object\": \"sections\"},\n {\"subject\": \"combining\", \"predicate\": \"operates on\", \"object\": \"sections\"},\n {\"subject\": \"removing\", \"predicate\": \"operates on\", \"object\": \"sections\"}\n ]\n}\n```",
"error": null
},
"_type": "course_module"
},
{
"source": "2026-04-26-23-04-voice.md",
"size_bucket": "small",
"doc_chars_original": 931,
"doc_chars_sent": 931,
"truncated": false,
"condition_a": {
"input_tokens": 397,
"output_tokens": 672,
"latency_s": 3.43,
"metrics": {
"n_entities": 13,
"n_edges": 17,
"predicate_diversity": 16,
"type_diversity": 7,
"avg_degree": 2.62,
"largest_component": 7,
"largest_component_pct": 53.8
},
"stop_reason": "end_turn",
"response_text": "```json\n{\n \"entities\": [\n {\"name\": \"Capture \u2014 2026-04-26-23-04\", \"type\": \"voice recording\"},\n {\"name\": \"extended mine experiment\", \"type\": \"project\"},\n {\"name\": \"extended mine project\", \"type\": \"project\"},\n {\"name\": \"Claude AI\", \"type\": \"AI assistant\"},\n {\"name\": \"Bird\", \"type\": \"dog\"},\n {\"name\": \"Cat\", \"type\": \"person\"},\n {\"name\": \"apple orchard\", \"type\": \"location\"},\n {\"name\": \"house\", \"type\": \"location\"},\n {\"name\": \"rose\", \"type\": \"location\"},\n {\"name\": \"stick\", \"type\": \"object\"},\n {\"name\": \"teal crocodile toy\", \"type\": \"object\"},\n {\"name\": \"dishes\", \"type\": \"object\"},\n {\"name\": \"sink\", \"type\": \"location\"}\n ],\n \"edges\": [\n {\"subject\": \"Capture \u2014 2026-04-26-23-04\", \"predicate\": \"is a\", \"object\": \"voice recording\"},\n {\"subject\": \"Capture \u2014 2026-04-26-23-04\", \"predicate\": \"has modality\", \"object\": \"audio\"},\n {\"subject\": \"Capture \u2014 2026-04-26-23-04\", \"predicate\": \"has status\", \"object\": \"unprocessed\"},\n {\"subject\": \"speaker\", \"predicate\": \"worked on\", \"object\": \"extended mine project\"},\n {\"subject\": \"speaker\", \"predicate\": \"worked with\", \"object\": \"Claude AI\"},\n {\"subject\": \"speaker\", \"predicate\": \"did\", \"object\": \"dishes\"},\n {\"subject\": \"dishes\", \"predicate\": \"were in\", \"object\": \"sink\"},\n {\"subject\": \"speaker\", \"predicate\": \"took on walk\", \"object\": \"Bird\"},\n {\"subject\": \"Cat\", \"predicate\": \"accompanied on walk\", \"object\": \"Bird\"},\n {\"subject\": \"Bird\", \"predicate\": \"walked through\", \"object\": \"apple orchard\"},\n {\"subject\": \"apple orchard\", \"predicate\": \"is next to\", \"object\": \"house\"},\n {\"subject\": \"Bird\", \"predicate\": \"walked through\", \"object\": \"rose\"},\n {\"subject\": \"Bird\", \"predicate\": \"pooped in\", \"object\": \"orchard\"},\n {\"subject\": \"Bird\", \"predicate\": \"chased\", \"object\": \"stick\"},\n {\"subject\": \"Bird\", \"predicate\": \"has\", \"object\": \"teal crocodile toy\"},\n {\"subject\": \"Bird\", \"predicate\": \"is following\", \"object\": \"speaker\"},\n {\"subject\": \"Bird\", \"predicate\": \"is squeaking at\", \"object\": \"speaker\"}\n ]\n}\n```",
"error": null
},
"condition_b": {
"local_latency_s": 20.78,
"local_metadata": {
"language": "en",
"char_length": 382,
"primary_format": "prose",
"structural_signals": {
"has_headings": false,
"has_bullet_lists": false,
"has_numbered_lists": false,
"has_tables": false,
"has_code_blocks": false,
"has_dates": true
},
"content_signals": {
"has_named_people": true,
"has_institutional_language": false,
"has_technical_terminology": false,
"has_first_person": true,
"has_quotations": false
},
"domain_class": "personal",
"one_sentence_summary": "A personal account of an individual's activities on a specific day, including household chores and pet care"
},
"local_raw": "{\n \"language\": \"en\",\n \"char_length\": 382,\n \"primary_format\": \"prose\",\n \"structural_signals\": {\n \"has_headings\": false,\n \"has_bullet_lists\": false,\n \"has_numbered_lists\": false,\n \"has_tables\": false,\n \"has_code_blocks\": false,\n \"has_dates\": true\n },\n \"content_signals\": {\n \"has_named_people\": true,\n \"has_institutional_language\": false,\n \"has_technical_terminology\": false,\n \"has_first_person\": true,\n \"has_quotations\": false\n },\n \"domain_class\": \"personal\",\n \"one_sentence_summary\": \"A personal account of an individual's activities on a specific day, including household chores and pet care\"\n}",
"api_input_tokens": 661,
"api_output_tokens": 740,
"api_latency_s": 3.6,
"metrics": {
"n_entities": 21,
"n_edges": 17,
"predicate_diversity": 17,
"type_diversity": 12,
"avg_degree": 1.62,
"largest_component": 12,
"largest_component_pct": 57.1
},
"stop_reason": "end_turn",
"response_text": "```json\n{\n \"entities\": [\n {\"name\": \"2026-04-26\", \"type\": \"date\"},\n {\"name\": \"voice recording\", \"type\": \"artifact\"},\n {\"name\": \"field recording\", \"type\": \"method\"},\n {\"name\": \"extended mine experiment\", \"type\": \"project\"},\n {\"name\": \"extended mine project\", \"type\": \"project\"},\n {\"name\": \"Claude AI\", \"type\": \"entity\"},\n {\"name\": \"dishes\", \"type\": \"object\"},\n {\"name\": \"sink\", \"type\": \"location\"},\n {\"name\": \"Bird\", \"type\": \"animal\"},\n {\"name\": \"Dog\", \"type\": \"animal\"},\n {\"name\": \"walk\", \"type\": \"activity\"},\n {\"name\": \"apple orchard\", \"type\": \"location\"},\n {\"name\": \"house\", \"type\": \"location\"},\n {\"name\": \"Cat\", \"type\": \"animal\"},\n {\"name\": \"rose\", \"type\": \"location\"},\n {\"name\": \"poop\", \"type\": \"object\"},\n {\"name\": \"stick\", \"type\": \"object\"},\n {\"name\": \"teal crocodile toy\", \"type\": \"object\"},\n {\"name\": \"Sunday\", \"type\": \"day\"},\n {\"name\": \"10:30\", \"type\": \"time\"},\n {\"name\": \"narrator\", \"type\": \"person\"}\n ],\n \"edges\": [\n {\"subject\": \"narrator\", \"predicate\": \"woke up at\", \"object\": \"10:30\"},\n {\"subject\": \"narrator\", \"predicate\": \"worked on\", \"object\": \"extended mine project\"},\n {\"subject\": \"narrator\", \"predicate\": \"worked with\", \"object\": \"Claude AI\"},\n {\"subject\": \"narrator\", \"predicate\": \"washed\", \"object\": \"dishes\"},\n {\"subject\": \"dishes\", \"predicate\": \"were in\", \"object\": \"sink\"},\n {\"subject\": \"narrator\", \"predicate\": \"took on walk\", \"object\": \"Bird\"},\n {\"subject\": \"narrator\", \"predicate\": \"walked with\", \"object\": \"Cat\"},\n {\"subject\": \"walk\", \"predicate\": \"occurred through\", \"object\": \"apple orchard\"},\n {\"subject\": \"apple orchard\", \"predicate\": \"is next to\", \"object\": \"house\"},\n {\"subject\": \"Bird\", \"predicate\": \"is a\", \"object\": \"Dog\"},\n {\"subject\": \"Bird\", \"predicate\": \"defecated in\", \"object\": \"orchard\"},\n {\"subject\": \"Bird\", \"predicate\": \"chased\", \"object\": \"stick\"},\n {\"subject\": \"Bird\", \"predicate\": \"has\", \"object\": \"teal crocodile toy\"},\n {\"subject\": \"Bird\", \"predicate\": \"is following\", \"object\": \"narrator\"},\n {\"subject\": \"Bird\", \"predicate\": \"is squeaking at\", \"object\": \"narrator\"},\n {\"subject\": \"narrator\", \"predicate\": \"walked through\", \"object\": \"rose\"},\n {\"subject\": \"day\", \"predicate\": \"is\", \"object\": \"Sunday\"}\n ]\n}\n```",
"error": null
},
"_type": "voice_capture"
},
{
"source": "2026-04-26-22-44-voice.md",
"size_bucket": "small",
"doc_chars_original": 165,
"doc_chars_sent": 165,
"truncated": false,
"condition_a": {
"input_tokens": 207,
"output_tokens": 344,
"latency_s": 1.98,
"metrics": {
"n_entities": 6,
"n_edges": 6,
"predicate_diversity": 6,
"type_diversity": 5,
"avg_degree": 2.0,
"largest_component": 6,
"largest_component_pct": 100.0
},
"stop_reason": "end_turn",
"response_text": "```json\n{\n \"entities\": [\n {\"name\": \"Capture \u2014 2026-04-26-22-44\", \"type\": \"recording\"},\n {\"name\": \"voice\", \"type\": \"type\"},\n {\"name\": \"audio\", \"type\": \"modality\"},\n {\"name\": \"unprocessed\", \"type\": \"status\"},\n {\"name\": \"test field recording\", \"type\": \"recording\"},\n {\"name\": \"capture layer\", \"type\": \"system_layer\"}\n ],\n \"edges\": [\n {\"subject\": \"Capture \u2014 2026-04-26-22-44\", \"predicate\": \"has type\", \"object\": \"voice\"},\n {\"subject\": \"Capture \u2014 2026-04-26-22-44\", \"predicate\": \"has modality\", \"object\": \"audio\"},\n {\"subject\": \"Capture \u2014 2026-04-26-22-44\", \"predicate\": \"has status\", \"object\": \"unprocessed\"},\n {\"subject\": \"Capture \u2014 2026-04-26-22-44\", \"predicate\": \"is a\", \"object\": \"test field recording\"},\n {\"subject\": \"test field recording\", \"predicate\": \"exists on\", \"object\": \"capture layer\"},\n {\"subject\": \"Capture \u2014 2026-04-26-22-44\", \"predicate\": \"is the first recording of\", \"object\": \"test field recording\"}\n ]\n}\n```",
"error": null
},
"condition_b": {
"local_latency_s": 14.45,
"local_metadata": {
"language": "en",
"char_length": 137,
"primary_format": "prose",
"structural_signals": {
"has_headings": false,
"has_bullet_lists": false,
"has_numbered_lists": false,
"has_tables": false,
"has_code_blocks": false,
"has_dates": true
},
"content_signals": {
"has_named_people": false,
"has_institutional_language": false,
"has_technical_terminology": false,
"has_first_person": false,
"has_quotations": false
},
"domain_class": "creative",
"one_sentence_summary": "Unprocessed audio test field recording"
},
"local_raw": "{\n \"language\": \"en\",\n \"char_length\": 137,\n \"primary_format\": \"prose\",\n \"structural_signals\": {\n \"has_headings\": false,\n \"has_bullet_lists\": false,\n \"has_numbered_lists\": false,\n \"has_tables\": false,\n \"has_code_blocks\": false,\n \"has_dates\": true\n },\n \"content_signals\": {\n \"has_named_people\": false,\n \"has_institutional_language\": false,\n \"has_technical_terminology\": false,\n \"has_first_person\": false,\n \"has_quotations\": false\n },\n \"domain_class\": \"creative\",\n \"one_sentence_summary\": \"Unprocessed audio test field recording\"\n}",
"api_input_tokens": 459,
"api_output_tokens": 369,
"api_latency_s": 2.2,
"metrics": {
"n_entities": 7,
"n_edges": 6,
"predicate_diversity": 6,
"type_diversity": 7,
"avg_degree": 1.71,
"largest_component": 7,
"largest_component_pct": 100.0
},
"stop_reason": "end_turn",
"response_text": "```json\n{\n \"entities\": [\n {\"name\": \"Capture \u2014 2026-04-26-22-44\", \"type\": \"recording\"},\n {\"name\": \"voice\", \"type\": \"type\"},\n {\"name\": \"audio\", \"type\": \"modality\"},\n {\"name\": \"unprocessed\", \"type\": \"status\"},\n {\"name\": \"test field recording\", \"type\": \"recording_classification\"},\n {\"name\": \"capture layer\", \"type\": \"layer\"},\n {\"name\": \"first recording\", \"type\": \"recording_ordinal\"}\n ],\n \"edges\": [\n {\"subject\": \"Capture \u2014 2026-04-26-22-44\", \"predicate\": \"has type\", \"object\": \"voice\"},\n {\"subject\": \"Capture \u2014 2026-04-26-22-44\", \"predicate\": \"has modality\", \"object\": \"audio\"},\n {\"subject\": \"Capture \u2014 2026-04-26-22-44\", \"predicate\": \"has status\", \"object\": \"unprocessed\"},\n {\"subject\": \"Capture \u2014 2026-04-26-22-44\", \"predicate\": \"is classified as\", \"object\": \"test field recording\"},\n {\"subject\": \"Capture \u2014 2026-04-26-22-44\", \"predicate\": \"exists on\", \"object\": \"capture layer\"},\n {\"subject\": \"Capture \u2014 2026-04-26-22-44\", \"predicate\": \"is\", \"object\": \"first recording\"}\n ]\n}\n```",
"error": null
},
"_type": "voice_capture"
},
{
"source": "07_Cube Assignment_2018f.docx",
"size_bucket": "medium",
"doc_chars_original": 1316,
"doc_chars_sent": 1316,
"truncated": false,
"condition_a": {
"input_tokens": 463,
"output_tokens": 902,
"latency_s": 4.36,
"metrics": {
"n_entities": 23,
"n_edges": 19,
"predicate_diversity": 12,
"type_diversity": 13,
"avg_degree": 1.65,
"largest_component": 13,
"largest_component_pct": 56.5
},
"stop_reason": "end_turn",
"response_text": "```json\n{\n \"entities\": [\n {\"name\": \"Cube\", \"type\": \"Object\"},\n {\"name\": \"Blue Cube\", \"type\": \"Object\"},\n {\"name\": \"Blue Layer\", \"type\": \"Layer\"},\n {\"name\": \"Measurements\", \"type\": \"Specification\"},\n {\"name\": \"1mm\", \"type\": \"Unit\"},\n {\"name\": \"0.04 inches\", \"type\": \"Unit\"},\n {\"name\": \"Initials\", \"type\": \"Element\"},\n {\"name\": \"Four view schematic\", \"type\": \"Drawing\"},\n {\"name\": \"2D drawing\", \"type\": \"Drawing\"},\n {\"name\": \"Dimensions\", \"type\": \"Specification\"},\n {\"name\": \"STL file\", \"type\": \"File Format\"},\n {\"name\": \"Center\", \"type\": \"Location\"},\n {\"name\": \"Standard layers\", \"type\": \"Setting\"},\n {\"name\": \"Infill\", \"type\": \"Setting\"},\n {\"name\": \"Raft\", \"type\": \"Setting\"},\n {\"name\": \"Supports\", \"type\": \"Setting\"},\n {\"name\": \"Rhino file\", \"type\": \"File Format\"},\n {\"name\": \"Blackboard\", \"type\": \"Platform\"},\n {\"name\": \"Colored layer\", \"type\": \"Layer\"},\n {\"name\": \"Labeled schematic\", \"type\": \"Drawing\"},\n {\"name\": \"Grade\", \"type\": \"Assessment\"},\n {\"name\": \"Letter grade\", \"type\": \"Assessment Unit\"},\n {\"name\": \"Re-submission\", \"type\": \"Process\"}\n ],\n \"edges\": [\n {\"subject\": \"Blue Cube\", \"predicate\": \"should be placed on\", \"object\": \"Blue Layer\"},\n {\"subject\": \"Cube\", \"predicate\": \"measurements should be accurate to within\", \"object\": \"1mm\"},\n {\"subject\": \"Cube\", \"predicate\": \"measurements should be accurate to within\", \"object\": \"0.04 inches\"},\n {\"subject\": \"Cube\", \"predicate\": \"should include\", \"object\": \"Initials\"},\n {\"subject\": \"Initials\", \"predicate\": \"should be\", \"object\": \"Legible\"},\n {\"subject\": \"Cube\", \"predicate\": \"requires\", \"object\": \"Four view schematic\"},\n {\"subject\": \"Four view schematic\", \"predicate\": \"is a\", \"object\": \"2D drawing\"},\n {\"subject\": \"2D drawing\", \"predicate\": \"should include\", \"object\": \"Dimensions\"},\n {\"subject\": \"Cube\", \"predicate\": \"should be exported as\", \"object\": \"STL file\"},\n {\"subject\": \"STL file\", \"predicate\": \"should be submitted to\", \"object\": \"Center\"},\n {\"subject\": \"Cube\", \"predicate\": \"should use settings\", \"object\": \"Standard layers\"},\n {\"subject\": \"Cube\", \"predicate\": \"should use settings\", \"object\": \"Infill\"},\n {\"subject\": \"Cube\", \"predicate\": \"should not use\", \"object\": \"Raft\"},\n {\"subject\": \"Cube\", \"predicate\": \"should not use\", \"object\": \"Supports\"},\n {\"subject\": \"Rhino file\", \"predicate\": \"should be submitted to\", \"object\": \"Blackboard\"},\n {\"subject\": \"Rhino file\", \"predicate\": \"should include\", \"object\": \"Colored layer\"},\n {\"subject\": \"Rhino file\", \"predicate\": \"should include\", \"object\": \"Labeled schematic\"},\n {\"subject\": \"Late submission\", \"predicate\": \"results in\", \"object\": \"Grade reduction by letter grade\"},\n {\"subject\": \"Re-submission\", \"predicate\": \"is not allowed for\", \"object\": \"Better grade\"}\n ]\n}\n```",
"error": null
},
"condition_b": {
"local_latency_s": 22.85,
"local_metadata": {
"language": "en",
"char_length": 230,
"primary_format": "prose",
"structural_signals": {
"has_headings": false,
"has_bullet_lists": false,
"has_numbered_lists": false,
"has_tables": false,
"has_code_blocks": false,
"has_dates": false
},
"content_signals": {
"has_named_people": false,
"has_institutional_language": true,
"has_technical_terminology": true,
"has_first_person": false,
"has_quotations": false
},
"domain_class": "technical",
"one_sentence_summary": "Instructions for creating and submitting cubes in a specific format"
},
"local_raw": "{\n \"language\": \"en\",\n \"char_length\": 230,\n \"primary_format\": \"prose\",\n \"structural_signals\": {\n \"has_headings\": false,\n \"has_bullet_lists\": false,\n \"has_numbered_lists\": false,\n \"has_tables\": false,\n \"has_code_blocks\": false,\n \"has_dates\": false\n },\n \"content_signals\": {\n \"has_named_people\": false,\n \"has_institutional_language\": true,\n \"has_technical_terminology\": true,\n \"has_first_person\": false,\n \"has_quotations\": false\n },\n \"domain_class\": \"technical\",\n \"one_sentence_summary\": \"Instructions for creating and submitting cubes in a specific format\"\n}",
"api_input_tokens": 719,
"api_output_tokens": 1229,
"api_latency_s": 5.91,
"metrics": {
"n_entities": 32,
"n_edges": 27,
"predicate_diversity": 19,
"type_diversity": 16,
"avg_degree": 1.69,
"largest_component": 12,
"largest_component_pct": 37.5
},
"stop_reason": "end_turn",
"response_text": "```json\n{\n \"entities\": [\n {\"name\": \"cubes\", \"type\": \"object\"},\n {\"name\": \"measurements\", \"type\": \"attribute\"},\n {\"name\": \"1mm\", \"type\": \"measurement_value\"},\n {\"name\": \"0.04 inches\", \"type\": \"measurement_value\"},\n {\"name\": \"scale\", \"type\": \"attribute\"},\n {\"name\": \"layer\", \"type\": \"structural_element\"},\n {\"name\": \"color\", \"type\": \"attribute\"},\n {\"name\": \"Blue Cube\", \"type\": \"object\"},\n {\"name\": \"Blue Layer\", \"type\": \"structural_element\"},\n {\"name\": \"initials\", \"type\": \"content\"},\n {\"name\": \"legibility\", \"type\": \"attribute\"},\n {\"name\": \"four view schematic\", \"type\": \"document_type\"},\n {\"name\": \"2D drawing\", \"type\": \"document_type\"},\n {\"name\": \"dimensions\", \"type\": \"attribute\"},\n {\"name\": \"details\", \"type\": \"information\"},\n {\"name\": \"additional views\", \"type\": \"document_type\"},\n {\"name\": \"STL format\", \"type\": \"file_format\"},\n {\"name\": \"center\", \"type\": \"location\"},\n {\"name\": \"orientation\", \"type\": \"attribute\"},\n {\"name\": \"standard layers\", \"type\": \"setting\"},\n {\"name\": \"infill\", \"type\": \"setting\"},\n {\"name\": \"raft\", \"type\": \"setting\"},\n {\"name\": \"supports\", \"type\": \"setting\"},\n {\"name\": \"Rhino file\", \"type\": \"file_type\"},\n {\"name\": \"blackboard\", \"type\": \"platform\"},\n {\"name\": \"printed cubes\", \"type\": \"object\"},\n {\"name\": \"colored layer\", \"type\": \"structural_element\"},\n {\"name\": \"labeled schematic\", \"type\": \"document_type\"},\n {\"name\": \"submission deadline\", \"type\": \"constraint\"},\n {\"name\": \"grade\", \"type\": \"evaluation_metric\"},\n {\"name\": \"letter grade\", \"type\": \"evaluation_unit\"},\n {\"name\": \"re-submission\", \"type\": \"action\"}\n ],\n \"edges\": [\n {\"subject\": \"cubes\", \"predicate\": \"should be replicated with\", \"object\": \"measurements\"},\n {\"subject\": \"measurements\", \"predicate\": \"should be accurate to within\", \"object\": \"1mm\"},\n {\"subject\": \"measurements\", \"predicate\": \"should be accurate to within\", \"object\": \"0.04 inches\"},\n {\"subject\": \"cubes\", \"predicate\": \"should be to\", \"object\": \"scale\"},\n {\"subject\": \"Blue Cube\", \"predicate\": \"should be placed on\", \"object\": \"Blue Layer\"},\n {\"subject\": \"cubes\", \"predicate\": \"should include\", \"object\": \"initials\"},\n {\"subject\": \"initials\", \"predicate\": \"should be\", \"object\": \"legibility\"},\n {\"subject\": \"initials\", \"predicate\": \"should be at\", \"object\": \"appropriate scale\"},\n {\"subject\": \"cubes\", \"predicate\": \"requires\", \"object\": \"four view schematic\"},\n {\"subject\": \"four view schematic\", \"predicate\": \"is a\", \"object\": \"2D drawing\"},\n {\"subject\": \"2D drawing\", \"predicate\": \"should include\", \"object\": \"dimensions\"},\n {\"subject\": \"dimensions\", \"predicate\": \"should indicate\", \"object\": \"details\"},\n {\"subject\": \"drawing\", \"predicate\": \"should be on\", \"object\": \"colored layer\"},\n {\"subject\": \"cubes\", \"predicate\": \"may require\", \"object\": \"additional views\"},\n {\"subject\": \"finished cubes\", \"predicate\": \"should be exported as\", \"object\": \"STL format\"},\n {\"subject\": \"exported cubes\", \"predicate\": \"should be submitted to\", \"object\": \"center\"},\n {\"subject\": \"cubes\", \"predicate\": \"should be oriented in same manner as\", \"object\": \"originals\"},\n {\"subject\": \"settings\", \"predicate\": \"should include\", \"object\": \"standard layers\"},\n {\"subject\": \"settings\", \"predicate\": \"should include\", \"object\": \"infill\"},\n {\"subject\": \"settings\", \"predicate\": \"should not include\", \"object\": \"raft\"},\n {\"subject\": \"settings\", \"predicate\": \"should not include\", \"object\": \"supports\"},\n {\"subject\": \"Rhino file\", \"predicate\": \"should be submitted to\", \"object\": \"blackboard\"},\n {\"subject\": \"printed cubes\", \"predicate\": \"should be submitted with\", \"object\": \"Rhino file\"},\n {\"subject\": \"Rhino file\", \"predicate\": \"should include\", \"object\": \"colored layer\"},\n {\"subject\": \"Rhino file\", \"predicate\": \"should include\", \"object\": \"labeled schematic\"},\n {\"subject\": \"late submission\", \"predicate\": \"results in grade reduction of\", \"object\": \"letter grade\"},\n {\"subject\":",
"error": null
},
"_type": "course_module"
},
{
"source": "06_Gumball.docx",
"size_bucket": "medium",
"doc_chars_original": 1980,
"doc_chars_sent": 1980,
"truncated": false,
"condition_a": {
"input_tokens": 652,
"output_tokens": 1467,
"latency_s": 6.64,
"metrics": {
"n_entities": 34,
"n_edges": 30,
"predicate_diversity": 17,
"type_diversity": 16,
"avg_degree": 1.76,
"largest_component": 7,
"largest_component_pct": 20.6
},
"stop_reason": "end_turn",
"response_text": "```json\n{\n \"entities\": [\n {\"name\": \"Gumball\", \"type\": \"Tool\"},\n {\"name\": \"Widget\", \"type\": \"Component\"},\n {\"name\": \"Selected object\", \"type\": \"Entity\"},\n {\"name\": \"Move transformation\", \"type\": \"Transformation\"},\n {\"name\": \"Scale transformation\", \"type\": \"Transformation\"},\n {\"name\": \"Rotate transformation\", \"type\": \"Transformation\"},\n {\"name\": \"Gumball origin\", \"type\": \"Reference point\"},\n {\"name\": \"Gumball menu\", \"type\": \"Interface\"},\n {\"name\": \"White menu ball\", \"type\": \"UI element\"},\n {\"name\": \"Relocate Gumball\", \"type\": \"Menu option\"},\n {\"name\": \"Gumball center\", \"type\": \"Reference point\"},\n {\"name\": \"Reset Gumball\", \"type\": \"Menu option\"},\n {\"name\": \"Area centroid\", \"type\": \"Reference point\"},\n {\"name\": \"Align to CPlane\", \"type\": \"Menu option\"},\n {\"name\": \"Align to Object\", \"type\": \"Menu option\"},\n {\"name\": \"Align to World\", \"type\": \"Menu option\"},\n {\"name\": \"CPlane\", \"type\": \"Coordinate system\"},\n {\"name\": \"X axis\", \"type\": \"Axis\"},\n {\"name\": \"Y axis\", \"type\": \"Axis\"},\n {\"name\": \"Z axis\", \"type\": \"Axis\"},\n {\"name\": \"U curve\", \"type\": \"Object property\"},\n {\"name\": \"V curve\", \"type\": \"Object property\"},\n {\"name\": \"World grid system\", \"type\": \"Coordinate system\"},\n {\"name\": \"Snappy dragging\", \"type\": \"Dragging mode\"},\n {\"name\": \"Smooth dragging\", \"type\": \"Dragging mode\"},\n {\"name\": \"Osnaps\", \"type\": \"Feature\"},\n {\"name\": \"Drag strength\", \"type\": \"Parameter\"},\n {\"name\": \"Gumball elements\", \"type\": \"Component\"},\n {\"name\": \"Red color\", \"type\": \"Color\"},\n {\"name\": \"Green color\", \"type\": \"Color\"},\n {\"name\": \"Blue color\", \"type\": \"Color\"},\n {\"name\": \"Dotted lines\", \"type\": \"Visual element\"},\n {\"name\": \"Arrow\", \"type\": \"Visual element\"},\n {\"name\": \"Box\", \"type\": \"Visual element\"}\n ],\n \"edges\": [\n {\"subject\": \"Gumball\", \"predicate\": \"is a\", \"object\": \"Widget\"},\n {\"subject\": \"Gumball\", \"predicate\": \"is used to facilitate\", \"object\": \"Direct editing\"},\n {\"subject\": \"Gumball\", \"predicate\": \"provides\", \"object\": \"Move transformation\"},\n {\"subject\": \"Gumball\", \"predicate\": \"provides\", \"object\": \"Scale transformation\"},\n {\"subject\": \"Gumball\", \"predicate\": \"provides\", \"object\": \"Rotate transformation\"},\n {\"subject\": \"Gumball\", \"predicate\": \"operates around\", \"object\": \"Gumball origin\"},\n {\"subject\": \"Gumball menu\", \"predicate\": \"is accessed by\", \"object\": \"White menu ball\"},\n {\"subject\": \"Relocate Gumball\", \"predicate\": \"allows redefinition of\", \"object\": \"Gumball center\"},\n {\"subject\": \"Gumball center\", \"predicate\": \"is the origin point for\", \"object\": \"Scaling\"},\n {\"subject\": \"Gumball center\", \"predicate\": \"is the origin point for\", \"object\": \"Translating\"},\n {\"subject\": \"Gumball center\", \"predicate\": \"is the origin point for\", \"object\": \"Rotating\"},\n {\"subject\": \"Reset Gumball\", \"predicate\": \"places gumball at\", \"object\": \"Area centroid\"},\n {\"subject\": \"Align to CPlane\", \"predicate\": \"makes arrows point in direction of\", \"object\": \"X axis\"},\n {\"subject\": \"Align to CPlane\", \"predicate\": \"makes arrows point in direction of\", \"object\": \"Y axis\"},\n {\"subject\": \"Align to CPlane\", \"predicate\": \"makes arrows point in direction of\", \"object\": \"Z axis\"},\n {\"subject\": \"Align to Object\", \"predicate\": \"relates gumball to\", \"object\": \"U curve\"},\n {\"subject\": \"Align to Object\", \"predicate\": \"relates gumball to\", \"object\": \"V curve\"},\n {\"subject\": \"Align to World\", \"predicate\": \"relates gumball to\", \"object\": \"World grid system\"},\n {\"subject\": \"Smooth dragging\", \"predicate\": \"allows dragging without\", \"object\": \"Restraints\"},\n {\"subject\": \"Snappy dragging\", \"predicate\": \"is equivalent to\", \"object\": \"Osnaps\"},\n {\"subject\": \"Snappy dragging\", \"predicate\": \"causes object to snap to\", \"object\": \"End point\"},\n {\"subject\": \"Snappy dragging\", \"predicate\": \"causes object to snap to\", \"object\": \"Mid point\"},\n {\"subject\": \"Snappy dragging\", \"predicate\": \"causes object to snap to\", \"objec",
"error": null
},
"condition_b": {
"local_latency_s": 29.12,
"local_metadata": {
"language": "en",
"char_length": 450,
"primary_format": "prose",
"structural_signals": {
"has_headings": false,
"has_bullet_lists": false,
"has_numbered_lists": false,
"has_tables": false,
"has_code_blocks": false,
"has_dates": false
},
"content_signals": {
"has_named_people": false,
"has_institutional_language": true,
"has_technical_terminology": true,
"has_first_person": false,
"has_quotations": false
},
"domain_class": "technical",
"one_sentence_summary": "Instructional text about the functionality and usage of a gumball tool in a 3D modeling software"
},
"local_raw": "{\n \"language\": \"en\",\n \"char_length\": 450,\n \"primary_format\": \"prose\",\n \"structural_signals\": {\n \"has_headings\": false,\n \"has_bullet_lists\": false,\n \"has_numbered_lists\": false,\n \"has_tables\": false,\n \"has_code_blocks\": false,\n \"has_dates\": false\n },\n \"content_signals\": {\n \"has_named_people\": false,\n \"has_institutional_language\": true,\n \"has_technical_terminology\": true,\n \"has_first_person\": false,\n \"has_quotations\": false\n },\n \"domain_class\": \"technical\",\n \"one_sentence_summary\": \"Instructional text about the functionality and usage of a gumball tool in a 3D modeling software\"\n}",
"api_input_tokens": 918,
"api_output_tokens": 1816,
"api_latency_s": 8.19,
"metrics": {
"n_entities": 50,
"n_edges": 35,
"predicate_diversity": 21,
"type_diversity": 25,
"avg_degree": 1.4,
"largest_component": 8,
"largest_component_pct": 16.0
},
"stop_reason": "end_turn",
"response_text": "```json\n{\n \"entities\": [\n {\"name\": \"Gumball\", \"type\": \"Tool\"},\n {\"name\": \"widget\", \"type\": \"Component\"},\n {\"name\": \"selected object\", \"type\": \"Entity\"},\n {\"name\": \"direct editing\", \"type\": \"Function\"},\n {\"name\": \"move transformation\", \"type\": \"Transformation\"},\n {\"name\": \"scale transformation\", \"type\": \"Transformation\"},\n {\"name\": \"rotate transformation\", \"type\": \"Transformation\"},\n {\"name\": \"gumball origin\", \"type\": \"Reference Point\"},\n {\"name\": \"Gumball Menu\", \"type\": \"Interface\"},\n {\"name\": \"white menu ball\", \"type\": \"UI Element\"},\n {\"name\": \"Relocate Gumball\", \"type\": \"Menu Option\"},\n {\"name\": \"gumball center\", \"type\": \"Reference Point\"},\n {\"name\": \"origin point\", \"type\": \"Reference Point\"},\n {\"name\": \"scaling\", \"type\": \"Operation\"},\n {\"name\": \"translating\", \"type\": \"Operation\"},\n {\"name\": \"rotating\", \"type\": \"Operation\"},\n {\"name\": \"Reset Gumball\", \"type\": \"Menu Option\"},\n {\"name\": \"area centroid\", \"type\": \"Reference Point\"},\n {\"name\": \"object orientation\", \"type\": \"Property\"},\n {\"name\": \"Align to CPlane\", \"type\": \"Menu Option\"},\n {\"name\": \"Align to Object\", \"type\": \"Menu Option\"},\n {\"name\": \"Align to World\", \"type\": \"Menu Option\"},\n {\"name\": \"CPlane\", \"type\": \"Reference System\"},\n {\"name\": \"arrows\", \"type\": \"UI Element\"},\n {\"name\": \"x axis\", \"type\": \"Axis\"},\n {\"name\": \"y axis\", \"type\": \"Axis\"},\n {\"name\": \"z axis\", \"type\": \"Axis\"},\n {\"name\": \"U curves\", \"type\": \"Object Property\"},\n {\"name\": \"V curves\", \"type\": \"Object Property\"},\n {\"name\": \"world grid system\", \"type\": \"Reference System\"},\n {\"name\": \"Snappy Dragging\", \"type\": \"Mode\"},\n {\"name\": \"Smooth Dragging\", \"type\": \"Mode\"},\n {\"name\": \"Osnaps\", \"type\": \"Feature\"},\n {\"name\": \"snap point\", \"type\": \"Reference Point\"},\n {\"name\": \"end snap\", \"type\": \"Snap Type\"},\n {\"name\": \"mid snap\", \"type\": \"Snap Type\"},\n {\"name\": \"center snap\", \"type\": \"Snap Type\"},\n {\"name\": \"Drag Strength\", \"type\": \"Parameter\"},\n {\"name\": \"mouse movement\", \"type\": \"Input\"},\n {\"name\": \"1:1 ratio\", \"type\": \"Ratio\"},\n {\"name\": \"Gumball Elements\", \"type\": \"Component Group\"},\n {\"name\": \"color\", \"type\": \"Property\"},\n {\"name\": \"axis assignment\", \"type\": \"Mapping\"},\n {\"name\": \"Red\", \"type\": \"Color\"},\n {\"name\": \"Green\", \"type\": \"Color\"},\n {\"name\": \"Blue\", \"type\": \"Color\"},\n {\"name\": \"Z plane\", \"type\": \"Dimension\"},\n {\"name\": \"Z dimension\", \"type\": \"Dimension\"},\n {\"name\": \"dotted lines\", \"type\": \"UI Element\"},\n {\"name\": \"Gumball Short-Hands and Tricks\", \"type\": \"Section\"}\n ],\n \"edges\": [\n {\"subject\": \"Gumball\", \"predicate\": \"is a\", \"object\": \"widget\"},\n {\"subject\": \"Gumball\", \"predicate\": \"is used for\", \"object\": \"direct editing\"},\n {\"subject\": \"Gumball\", \"predicate\": \"operates on\", \"object\": \"selected object\"},\n {\"subject\": \"Gumball\", \"predicate\": \"provides\", \"object\": \"move transformation\"},\n {\"subject\": \"Gumball\", \"predicate\": \"provides\", \"object\": \"scale transformation\"},\n {\"subject\": \"Gumball\", \"predicate\": \"provides\", \"object\": \"rotate transformation\"},\n {\"subject\": \"move transformation\", \"predicate\": \"occurs around\", \"object\": \"gumball origin\"},\n {\"subject\": \"scale transformation\", \"predicate\": \"occurs around\", \"object\": \"gumball origin\"},\n {\"subject\": \"rotate transformation\", \"predicate\": \"occurs around\", \"object\": \"gumball origin\"},\n {\"subject\": \"white menu ball\", \"predicate\": \"opens\", \"object\": \"Gumball Menu\"},\n {\"subject\": \"Relocate Gumball\", \"predicate\": \"allows redefinition of\", \"object\": \"gumball center\"},\n {\"subject\": \"gumball center\", \"predicate\": \"serves as\", \"object\": \"origin point\"},\n {\"subject\": \"origin point\", \"predicate\": \"is reference for\", \"object\": \"scaling\"},\n {\"subject\": \"origin point\", \"predicate\": \"is reference for\", \"object\": \"translating\"},\n {\"subject\": \"origin point\", \"predicate\": \"is reference for\", \"object\": \"rotating\"},\n {\"subject\": \"Reset Gumball\", \"predicate\": \"places gumball a",
"error": null
},
"_type": "course_module"
},
{
"source": "00_Syllabus.docx",
"size_bucket": "medium",
"doc_chars_original": 2273,
"doc_chars_sent": 2273,
"truncated": false,
"condition_a": {
"input_tokens": 607,
"output_tokens": 1199,
"latency_s": 5.55,
"metrics": {
"n_entities": 35,
"n_edges": 25,
"predicate_diversity": 15,
"type_diversity": 15,
"avg_degree": 1.43,
"largest_component": 17,
"largest_component_pct": 48.6
},
"stop_reason": "end_turn",
"response_text": "```json\n{\n \"entities\": [\n {\"name\": \"3D Computer Aided Design drawing\", \"type\": \"subject\"},\n {\"name\": \"rapid manufacturing\", \"type\": \"subject\"},\n {\"name\": \"Rhino3D\", \"type\": \"software\"},\n {\"name\": \"students\", \"type\": \"agent\"},\n {\"name\": \"virtual spaces\", \"type\": \"concept\"},\n {\"name\": \"CAD software\", \"type\": \"software\"},\n {\"name\": \"NURBS geometry\", \"type\": \"concept\"},\n {\"name\": \"3D printing\", \"type\": \"process\"},\n {\"name\": \"digital fabrication software\", \"type\": \"software\"},\n {\"name\": \"digital fabrication hardware\", \"type\": \"hardware\"},\n {\"name\": \"spatial awareness\", \"type\": \"skill\"},\n {\"name\": \"scales\", \"type\": \"concept\"},\n {\"name\": \"measuring devices\", \"type\": \"tool\"},\n {\"name\": \"physical dimensions\", \"type\": \"concept\"},\n {\"name\": \"virtual space\", \"type\": \"concept\"},\n {\"name\": \"three dimensional form\", \"type\": \"concept\"},\n {\"name\": \"digital modeling\", \"type\": \"process\"},\n {\"name\": \"three dimensional design principals\", \"type\": \"concept\"},\n {\"name\": \"Desktop FDM printing\", \"type\": \"process\"},\n {\"name\": \"printing process\", \"type\": \"process\"},\n {\"name\": \"tolerances\", \"type\": \"concept\"},\n {\"name\": \"critical thinking\", \"type\": \"skill\"},\n {\"name\": \"problem solving skills\", \"type\": \"skill\"},\n {\"name\": \"research methods\", \"type\": \"methodology\"},\n {\"name\": \"assignments\", \"type\": \"task\"},\n {\"name\": \"Rhino Level 1 Training Guide\", \"type\": \"resource\"},\n {\"name\": \"grading system\", \"type\": \"system\"},\n {\"name\": \"attendance\", \"type\": \"requirement\"},\n {\"name\": \"grade\", \"type\": \"metric\"},\n {\"name\": \"planning\", \"type\": \"skill\"},\n {\"name\": \"project completion\", \"type\": \"task\"},\n {\"name\": \"design elements\", \"type\": \"concept\"},\n {\"name\": \"design principles\", \"type\": \"concept\"},\n {\"name\": \"original thinking\", \"type\": \"skill\"},\n {\"name\": \"course material\", \"type\": \"content\"}\n ],\n \"edges\": [\n {\"subject\": \"course\", \"predicate\": \"introduces\", \"object\": \"3D Computer Aided Design drawing\"},\n {\"subject\": \"course\", \"predicate\": \"introduces\", \"object\": \"rapid manufacturing\"},\n {\"subject\": \"course\", \"predicate\": \"uses\", \"object\": \"Rhino3D\"},\n {\"subject\": \"students\", \"predicate\": \"become acquainted with\", \"object\": \"virtual spaces\"},\n {\"subject\": \"students\", \"predicate\": \"become acquainted with\", \"object\": \"NURBS geometry\"},\n {\"subject\": \"students\", \"predicate\": \"gain hands on technical skills with\", \"object\": \"digital fabrication software\"},\n {\"subject\": \"students\", \"predicate\": \"gain hands on technical skills with\", \"object\": \"digital fabrication hardware\"},\n {\"subject\": \"students\", \"predicate\": \"demonstrate\", \"object\": \"spatial awareness\"},\n {\"subject\": \"students\", \"predicate\": \"use\", \"object\": \"scales\"},\n {\"subject\": \"students\", \"predicate\": \"use\", \"object\": \"measuring devices\"},\n {\"subject\": \"students\", \"predicate\": \"translate\", \"object\": \"physical dimensions into virtual space\"},\n {\"subject\": \"students\", \"predicate\": \"gain working knowledge of\", \"object\": \"three dimensional form\"},\n {\"subject\": \"students\", \"predicate\": \"apply\", \"object\": \"three dimensional design principals\"},\n {\"subject\": \"students\", \"predicate\": \"understand\", \"object\": \"Desktop FDM printing\"},\n {\"subject\": \"students\", \"predicate\": \"understand\", \"object\": \"printing process\"},\n {\"subject\": \"students\", \"predicate\": \"understand and apply\", \"object\": \"tolerances\"},\n {\"subject\": \"students\", \"predicate\": \"use\", \"object\": \"critical thinking\"},\n {\"subject\": \"students\", \"predicate\": \"use\", \"object\": \"problem solving skills\"},\n {\"subject\": \"students\", \"predicate\": \"use\", \"object\": \"research methods\"},\n {\"subject\": \"NURBS geometry\", \"predicate\": \"enables output of\", \"object\": \"3D printing\"},\n {\"subject\": \"late assignment\", \"predicate\": \"results in grade reduction of\", \"object\": \"10%\"},\n {\"subject\": \"grading system\", \"predicate\": \"evaluates\", \"object\": \"planning\"},\n {\"subject\": \"grading system\", \"predicate\": \"evaluates\", \"obj",
"error": null
},
"condition_b": {
"local_latency_s": 28.53,
"local_metadata": {
"language": "en",
"char_length": 320,
"primary_format": "prose",
"structural_signals": {
"has_headings": true,
"has_bullet_lists": false,
"has_numbered_lists": false,
"has_tables": false,
"has_code_blocks": false,
"has_dates": false
},
"content_signals": {
"has_named_people": false,
"has_institutional_language": true,
"has_technical_terminology": true,
"has_first_person": false,
"has_quotations": false
},
"domain_class": "educational",
"one_sentence_summary": "This document outlines a course on 3D Computer Aided Design and rapid manufacturing using Rhino3D software."
},
"local_raw": "{\n \"language\": \"en\",\n \"char_length\": 320,\n \"primary_format\": \"prose\",\n \"structural_signals\": {\n \"has_headings\": true,\n \"has_bullet_lists\": false,\n \"has_numbered_lists\": false,\n \"has_tables\": false,\n \"has_code_blocks\": false,\n \"has_dates\": false\n },\n \"content_signals\": {\n \"has_named_people\": false,\n \"has_institutional_language\": true,\n \"has_technical_terminology\": true,\n \"has_first_person\": false,\n \"has_quotations\": false\n },\n \"domain_class\": \"educational\",\n \"one_sentence_summary\": \"This document outlines a course on 3D Computer Aided Design and rapid manufacturing using Rhino3D software.\"\n}",
"api_input_tokens": 875,
"api_output_tokens": 1493,
"api_latency_s": 6.75,
"metrics": {
"n_entities": 34,
"n_edges": 38,
"predicate_diversity": 19,
"type_diversity": 16,
"avg_degree": 2.24,
"largest_component": 3,
"largest_component_pct": 8.8
},
"stop_reason": "end_turn",
"response_text": "```json\n{\n \"entities\": [\n {\"name\": \"3D Computer Aided Design\", \"type\": \"subject\"},\n {\"name\": \"rapid manufacturing\", \"type\": \"subject\"},\n {\"name\": \"Rhino3D\", \"type\": \"software\"},\n {\"name\": \"CAD software\", \"type\": \"software_category\"},\n {\"name\": \"NURBS geometry\", \"type\": \"technical_concept\"},\n {\"name\": \"3D printing\", \"type\": \"technology\"},\n {\"name\": \"digital fabrication software\", \"type\": \"software_category\"},\n {\"name\": \"digital fabrication hardware\", \"type\": \"hardware_category\"},\n {\"name\": \"spatial awareness\", \"type\": \"skill\"},\n {\"name\": \"scales\", \"type\": \"concept\"},\n {\"name\": \"measuring devices\", \"type\": \"tool\"},\n {\"name\": \"formula\", \"type\": \"concept\"},\n {\"name\": \"physical dimensions\", \"type\": \"concept\"},\n {\"name\": \"virtual space\", \"type\": \"concept\"},\n {\"name\": \"three dimensional form\", \"type\": \"concept\"},\n {\"name\": \"digital modeling\", \"type\": \"skill\"},\n {\"name\": \"three dimensional design principals\", \"type\": \"concept\"},\n {\"name\": \"Desktop FDM printing\", \"type\": \"technology\"},\n {\"name\": \"tolerances\", \"type\": \"concept\"},\n {\"name\": \"critical thinking\", \"type\": \"skill\"},\n {\"name\": \"problem solving skills\", \"type\": \"skill\"},\n {\"name\": \"research methods\", \"type\": \"methodology\"},\n {\"name\": \"Rhino Level 1 Training Guide\", \"type\": \"learning_material\"},\n {\"name\": \"assignments\", \"type\": \"assessment\"},\n {\"name\": \"homework\", \"type\": \"assessment\"},\n {\"name\": \"grading\", \"type\": \"assessment_method\"},\n {\"name\": \"attendance\", \"type\": \"assessment_criterion\"},\n {\"name\": \"grade A\", \"type\": \"grade\"},\n {\"name\": \"grade B\", \"type\": \"grade\"},\n {\"name\": \"grade C\", \"type\": \"grade\"},\n {\"name\": \"grade D\", \"type\": \"grade\"},\n {\"name\": \"grade F\", \"type\": \"grade\"},\n {\"name\": \"project\", \"type\": \"deliverable\"},\n {\"name\": \"elements and principles of design\", \"type\": \"concept\"}\n ],\n \"edges\": [\n {\"subject\": \"course\", \"predicate\": \"introduces\", \"object\": \"3D Computer Aided Design\"},\n {\"subject\": \"course\", \"predicate\": \"introduces\", \"object\": \"rapid manufacturing\"},\n {\"subject\": \"course\", \"predicate\": \"uses\", \"object\": \"Rhino3D\"},\n {\"subject\": \"students\", \"predicate\": \"become acquainted with\", \"object\": \"virtual space\"},\n {\"subject\": \"students\", \"predicate\": \"become acquainted with\", \"object\": \"NURBS geometry\"},\n {\"subject\": \"students\", \"predicate\": \"gain skills in\", \"object\": \"digital fabrication software\"},\n {\"subject\": \"students\", \"predicate\": \"gain skills in\", \"object\": \"digital fabrication hardware\"},\n {\"subject\": \"students\", \"predicate\": \"demonstrate\", \"object\": \"spatial awareness\"},\n {\"subject\": \"students\", \"predicate\": \"use\", \"object\": \"scales\"},\n {\"subject\": \"students\", \"predicate\": \"use\", \"object\": \"measuring devices\"},\n {\"subject\": \"students\", \"predicate\": \"apply\", \"object\": \"formula\"},\n {\"subject\": \"formula\", \"predicate\": \"translates\", \"object\": \"physical dimensions\"},\n {\"subject\": \"physical dimensions\", \"predicate\": \"converted to\", \"object\": \"virtual space\"},\n {\"subject\": \"students\", \"predicate\": \"gain knowledge of\", \"object\": \"three dimensional form\"},\n {\"subject\": \"students\", \"predicate\": \"gain knowledge of\", \"object\": \"digital modeling\"},\n {\"subject\": \"students\", \"predicate\": \"apply\", \"object\": \"three dimensional design principals\"},\n {\"subject\": \"students\", \"predicate\": \"understand\", \"object\": \"Desktop FDM printing\"},\n {\"subject\": \"students\", \"predicate\": \"understand\", \"object\": \"printing process\"},\n {\"subject\": \"students\", \"predicate\": \"apply\", \"object\": \"tolerances\"},\n {\"subject\": \"students\", \"predicate\": \"prepare files for\", \"object\": \"output\"},\n {\"subject\": \"students\", \"predicate\": \"use\", \"object\": \"critical thinking\"},\n {\"subject\": \"students\", \"predicate\": \"use\", \"object\": \"problem solving skills\"},\n {\"subject\": \"students\", \"predicate\": \"use\", \"object\": \"research methods\"},\n {\"subject\": \"course\", \"predicate\": \"requires\", \"object\": \"Rhino Level 1 Training Guide\"},\n {\"s",
"error": null
},
"_type": "syllabus"
},
{
"source": "1119345.pdf",
"size_bucket": "medium",
"doc_chars_original": 1745,
"doc_chars_sent": 1745,
"truncated": false,
"condition_a": {
"input_tokens": 807,
"output_tokens": 1434,
"latency_s": 6.86,
"metrics": {
"n_entities": 33,
"n_edges": 21,
"predicate_diversity": 21,
"type_diversity": 25,
"avg_degree": 1.27,
"largest_component": 15,
"largest_component_pct": 45.5
},
"stop_reason": "end_turn",
"response_text": "```json\n{\n \"entities\": [\n {\"name\": \"Stratasys\", \"type\": \"Company\"},\n {\"name\": \"7665 Commerce Way Eden Prairie, MN 55344\", \"type\": \"Address\"},\n {\"name\": \"1.800.801.6491\", \"type\": \"Phone Number\"},\n {\"name\": \"http://www.stratasys.com\", \"type\": \"Website\"},\n {\"name\": \"Invoice 1119345\", \"type\": \"Invoice\"},\n {\"name\": \"01-JUN-16\", \"type\": \"Date\"},\n {\"name\": \"SUNY New Paltz\", \"type\": \"Organization\"},\n {\"name\": \"Smiley Arts Building 100 1 Hawk Dr New Paltz, NY, 12561-2447\", \"type\": \"Address\"},\n {\"name\": \"Stratasys US OU\", \"type\": \"Organization\"},\n {\"name\": \"AARON NELSON\", \"type\": \"Person\"},\n {\"name\": \"1109911\", \"type\": \"Order Number\"},\n {\"name\": \"CC-AARON NELSON-9121\", \"type\": \"Purchase Order\"},\n {\"name\": \"USD\", \"type\": \"Currency\"},\n {\"name\": \"08-JUN-16\", \"type\": \"Date\"},\n {\"name\": \"8326568\", \"type\": \"Internal ID\"},\n {\"name\": \"OBJ-03327\", \"type\": \"Part Number\"},\n {\"name\": \"PACK OF 1 RGD837, VERO PUREWHITE, 3.6KG\", \"type\": \"Product\"},\n {\"name\": \"13406-03327\", \"type\": \"Serial/Lot Number\"},\n {\"name\": \"15-JUN-17\", \"type\": \"Expiration Date\"},\n {\"name\": \"521.00\", \"type\": \"Price\"},\n {\"name\": \"3888472\", \"type\": \"Delivery Number\"},\n {\"name\": \"UPS Ground\", \"type\": \"Shipping Method\"},\n {\"name\": \"accounts.receivable@stratasys.com\", \"type\": \"Email Address\"},\n {\"name\": \"Stratasys, Inc\", \"type\": \"Company\"},\n {\"name\": \"28043 Network Place Chicago, IL 60673-1280\", \"type\": \"Address\"},\n {\"name\": \"JP Morgan Chase Bank\", \"type\": \"Bank\"},\n {\"name\": \"021000021\", \"type\": \"Routing Number\"},\n {\"name\": \"601551695\", \"type\": \"Account Number\"},\n {\"name\": \"CHASUS33\", \"type\": \"SWIFT Code\"},\n {\"name\": \"GB02CHAS60924241287679\", \"type\": \"IBAN\"},\n {\"name\": \"CHASGB2L\", \"type\": \"SWIFT Code\"},\n {\"name\": \"41287679\", \"type\": \"Account Number\"},\n {\"name\": \"124001545\", \"type\": \"Routing Number\"}\n ],\n \"edges\": [\n {\"subject\": \"Stratasys\", \"predicate\": \"is located at\", \"object\": \"7665 Commerce Way Eden Prairie, MN 55344\"},\n {\"subject\": \"Stratasys\", \"predicate\": \"has phone number\", \"object\": \"1.800.801.6491\"},\n {\"subject\": \"Stratasys\", \"predicate\": \"has website\", \"object\": \"http://www.stratasys.com\"},\n {\"subject\": \"Invoice 1119345\", \"predicate\": \"issued on\", \"object\": \"01-JUN-16\"},\n {\"subject\": \"Invoice 1119345\", \"predicate\": \"billed to\", \"object\": \"SUNY New Paltz\"},\n {\"subject\": \"Invoice 1119345\", \"predicate\": \"shipped to\", \"object\": \"Smiley Arts Building 100 1 Hawk Dr New Paltz, NY, 12561-2447\"},\n {\"subject\": \"Invoice 1119345\", \"predicate\": \"issued by\", \"object\": \"Stratasys US OU\"},\n {\"subject\": \"Invoice 1119345\", \"predicate\": \"references purchase order\", \"object\": \"CC-AARON NELSON-9121\"},\n {\"subject\": \"Invoice 1119345\", \"predicate\": \"due on\", \"object\": \"08-JUN-16\"},\n {\"subject\": \"Invoice 1119345\", \"predicate\": \"contains line item\", \"object\": \"OBJ-03327\"},\n {\"subject\": \"OBJ-03327\", \"predicate\": \"describes product\", \"object\": \"PACK OF 1 RGD837, VERO PUREWHITE, 3.6KG\"},\n {\"subject\": \"OBJ-03327\", \"predicate\": \"has serial lot number\", \"object\": \"13406-03327\"},\n {\"subject\": \"OBJ-03327\", \"predicate\": \"has expiration date\", \"object\": \"15-JUN-17\"},\n {\"subject\": \"OBJ-03327\", \"predicate\": \"has unit price\", \"object\": \"521.00\"},\n {\"subject\": \"Invoice 1119345\", \"predicate\": \"total amount\", \"object\": \"521.00\"},\n {\"subject\": \"Invoice 1119345\", \"predicate\": \"shipped via\", \"object\": \"UPS Ground\"},\n {\"subject\": \"Invoice 1119345\", \"predicate\": \"has delivery number\", \"object\": \"3888472\"},\n {\"subject\": \"Invoice 1119345\", \"predicate\": \"shipped on\", \"object\": \"01-JUN-16\"},\n {\"subject\": \"AARON NELSON\", \"predicate\": \"associated with\", \"object\": \"CC-AARON NELSON-9121\"},\n {\"subject\": \"Stratasys, Inc\", \"predicate\": \"located at\", \"object\": \"28043 Network Place Chicago, IL 60673-1280\"},\n {\"subject\": \"Stratasys\", \"predicate\": \"accepts payment via\", \"object\": \"JP Morgan Chase Bank\"}\n ]\n}\n```",
"error": null
},
"condition_b": {
"local_latency_s": 39.42,
"local_metadata": {
"language": "en",
"char_length": 780,
"primary_format": "prose",
"structural_signals": {
"has_headings": false,
"has_bullet_lists": false,
"has_numbered_lists": false,
"has_tables": false,
"has_code_blocks": false,
"has_dates": true
},
"content_signals": {
"has_named_people": false,
"has_institutional_language": true,
"has_technical_terminology": true,
"has_first_person": false,
"has_quotations": false
},
"domain_class": "administrative",
"one_sentence_summary": "Invoice for a purchase order from Stratasys US OU to SUNY New Paltz for a product named OBJ-03327"
},
"local_raw": "{\n \"language\": \"en\",\n \"char_length\": 780,\n \"primary_format\": \"prose\",\n \"structural_signals\": {\n \"has_headings\": false,\n \"has_bullet_lists\": false,\n \"has_numbered_lists\": false,\n \"has_tables\": false,\n \"has_code_blocks\": false,\n \"has_dates\": true\n },\n \"content_signals\": {\n \"has_named_people\": false,\n \"has_institutional_language\": true,\n \"has_technical_terminology\": true,\n \"has_first_person\": false,\n \"has_quotations\": false\n },\n \"domain_class\": \"administrative\",\n \"one_sentence_summary\": \"Invoice for a purchase order from Stratasys US OU to SUNY New Paltz for a product named OBJ-03327\"\n}",
"api_input_tokens": 1082,
"api_output_tokens": 1626,
"api_latency_s": 7.53,
"metrics": {
"n_entities": 29,
"n_edges": 31,
"predicate_diversity": 22,
"type_diversity": 17,
"avg_degree": 2.14,
"largest_component": 20,
"largest_component_pct": 69.0
},
"stop_reason": "end_turn",
"response_text": "```json\n{\n \"entities\": [\n {\"name\": \"Stratasys US OU\", \"type\": \"Organization\"},\n {\"name\": \"SUNY New Paltz\", \"type\": \"Organization\"},\n {\"name\": \"Invoice 1119345\", \"type\": \"Document\"},\n {\"name\": \"Purchase Order 1109911\", \"type\": \"Document\"},\n {\"name\": \"OBJ-03327\", \"type\": \"Product\"},\n {\"name\": \"PACK OF 1 RGD837, VERO PUREWHITE, 3.6KG\", \"type\": \"Product Description\"},\n {\"name\": \"Smiley Arts Building 100\", \"type\": \"Location\"},\n {\"name\": \"1 Hawk Dr\", \"type\": \"Address\"},\n {\"name\": \"New Paltz, NY, 12561-2447\", \"type\": \"Location\"},\n {\"name\": \"United States\", \"type\": \"Country\"},\n {\"name\": \"7665 Commerce Way\", \"type\": \"Address\"},\n {\"name\": \"Eden Prairie, MN 55344\", \"type\": \"Location\"},\n {\"name\": \"01-JUN-16\", \"type\": \"Date\"},\n {\"name\": \"08-JUN-16\", \"type\": \"Date\"},\n {\"name\": \"15-JUN-17\", \"type\": \"Date\"},\n {\"name\": \"USD\", \"type\": \"Currency\"},\n {\"name\": \"CC\", \"type\": \"Payment Terms\"},\n {\"name\": \"UPS Ground\", \"type\": \"Shipping Method\"},\n {\"name\": \"Delivery Number 3888472\", \"type\": \"Shipment\"},\n {\"name\": \"Serial/Lot 13406-03327\", \"type\": \"Identifier\"},\n {\"name\": \"accounts.receivable@stratasys.com\", \"type\": \"Email Address\"},\n {\"name\": \"Stratasys, Inc\", \"type\": \"Organization\"},\n {\"name\": \"28043 Network Place\", \"type\": \"Address\"},\n {\"name\": \"Chicago, IL 60673-1280\", \"type\": \"Location\"},\n {\"name\": \"JP Morgan Chase Bank\", \"type\": \"Financial Institution\"},\n {\"name\": \"CHASUS33\", \"type\": \"SWIFT Code\"},\n {\"name\": \"CHASGB2L\", \"type\": \"SWIFT Code\"},\n {\"name\": \"CC-AARON NELSON-9121\", \"type\": \"Account Reference\"},\n {\"name\": \"Internal ID 8326568\", \"type\": \"Identifier\"}\n ],\n \"edges\": [\n {\"subject\": \"Invoice 1119345\", \"predicate\": \"issued by\", \"object\": \"Stratasys US OU\"},\n {\"subject\": \"Invoice 1119345\", \"predicate\": \"billed to\", \"object\": \"SUNY New Paltz\"},\n {\"subject\": \"Invoice 1119345\", \"predicate\": \"issued on\", \"object\": \"01-JUN-16\"},\n {\"subject\": \"Invoice 1119345\", \"predicate\": \"references\", \"object\": \"Purchase Order 1109911\"},\n {\"subject\": \"Purchase Order 1109911\", \"predicate\": \"placed by\", \"object\": \"SUNY New Paltz\"},\n {\"subject\": \"Purchase Order 1109911\", \"predicate\": \"placed with\", \"object\": \"Stratasys US OU\"},\n {\"subject\": \"Invoice 1119345\", \"predicate\": \"contains line item\", \"object\": \"OBJ-03327\"},\n {\"subject\": \"OBJ-03327\", \"predicate\": \"has description\", \"object\": \"PACK OF 1 RGD837, VERO PUREWHITE, 3.6KG\"},\n {\"subject\": \"OBJ-03327\", \"predicate\": \"has unit price\", \"object\": \"USD 521.00\"},\n {\"subject\": \"OBJ-03327\", \"predicate\": \"has serial lot\", \"object\": \"Serial/Lot 13406-03327\"},\n {\"subject\": \"Serial/Lot 13406-03327\", \"predicate\": \"expires on\", \"object\": \"15-JUN-17\"},\n {\"subject\": \"SUNY New Paltz\", \"predicate\": \"located at\", \"object\": \"Smiley Arts Building 100\"},\n {\"subject\": \"Smiley Arts Building 100\", \"predicate\": \"located at\", \"object\": \"1 Hawk Dr\"},\n {\"subject\": \"1 Hawk Dr\", \"predicate\": \"located in\", \"object\": \"New Paltz, NY, 12561-2447\"},\n {\"subject\": \"New Paltz, NY, 12561-2447\", \"predicate\": \"located in\", \"object\": \"United States\"},\n {\"subject\": \"Stratasys US OU\", \"predicate\": \"located at\", \"object\": \"7665 Commerce Way\"},\n {\"subject\": \"7665 Commerce Way\", \"predicate\": \"located in\", \"object\": \"Eden Prairie, MN 55344\"},\n {\"subject\": \"Invoice 1119345\", \"predicate\": \"due on\", \"object\": \"08-JUN-16\"},\n {\"subject\": \"Invoice 1119345\", \"predicate\": \"payment terms\", \"object\": \"CC\"},\n {\"subject\": \"Invoice 1119345\", \"predicate\": \"currency\", \"object\": \"USD\"},\n {\"subject\": \"Invoice 1119345\", \"predicate\": \"total amount\", \"object\": \"521.00 USD\"},\n {\"subject\": \"Delivery Number 3888472\", \"predicate\": \"shipped on\", \"object\": \"01-JUN-16\"},\n {\"subject\": \"Delivery Number 3888472\", \"predicate\": \"shipped via\", \"object\": \"UPS Ground\"},\n {\"subject\": \"Delivery Number 3888472\", \"predicate\": \"associated with\", \"object\": \"Invoice 1119345\"},\n {\"subject\": \"Stratasys, Inc\", \"predicate\": \"accepts payment method",
"error": null
},
"_type": "other"
},
{
"source": "06_PPT_ALL_Design Considerations_From CAD to CAM_v3.pptx",
"size_bucket": "large",
"doc_chars_original": 21383,
"doc_chars_sent": 12000,
"truncated": true,
"condition_a": {
"input_tokens": 2951,
"output_tokens": 2424,
"latency_s": 13.83,
"metrics": {
"n_entities": 55,
"n_edges": 54,
"predicate_diversity": 33,
"type_diversity": 26,
"avg_degree": 1.96,
"largest_component": 13,
"largest_component_pct": 23.6
},
"stop_reason": "end_turn",
"response_text": "```json\n{\n \"entities\": [\n {\"name\": \"3D Printing\", \"type\": \"Technology\"},\n {\"name\": \"CAD\", \"type\": \"Software\"},\n {\"name\": \"CAM\", \"type\": \"Software\"},\n {\"name\": \"Slicing\", \"type\": \"Process\"},\n {\"name\": \"Support\", \"type\": \"Design Element\"},\n {\"name\": \"Part Density\", \"type\": \"Design Parameter\"},\n {\"name\": \"Wall Thickness\", \"type\": \"Design Parameter\"},\n {\"name\": \"Tolerance\", \"type\": \"Design Parameter\"},\n {\"name\": \"Support Removal\", \"type\": \"Process\"},\n {\"name\": \"Infill\", \"type\": \"Design Parameter\"},\n {\"name\": \"Mesh\", \"type\": \"Data Structure\"},\n {\"name\": \"STL\", \"type\": \"File Format\"},\n {\"name\": \"ASCII\", \"type\": \"File Format\"},\n {\"name\": \"Binary\", \"type\": \"File Format\"},\n {\"name\": \"FDM\", \"type\": \"3D Printing Technology\"},\n {\"name\": \"PolyJet\", \"type\": \"3D Printing Technology\"},\n {\"name\": \"J750\", \"type\": \"3D Printer\"},\n {\"name\": \"Texture Mapping\", \"type\": \"Design Technique\"},\n {\"name\": \"Nozzle Diameter\", \"type\": \"Machine Parameter\"},\n {\"name\": \"Droplet Size\", \"type\": \"Machine Parameter\"},\n {\"name\": \"Material Rheology\", \"type\": \"Material Property\"},\n {\"name\": \"Clearance\", \"type\": \"Design Parameter\"},\n {\"name\": \"XY Clearance\", \"type\": \"Design Parameter\"},\n {\"name\": \"Z Clearance\", \"type\": \"Design Parameter\"},\n {\"name\": \"Accuracy\", \"type\": \"Measurement Property\"},\n {\"name\": \"Precision\", \"type\": \"Measurement Property\"},\n {\"name\": \"Resolution\", \"type\": \"Machine Property\"},\n {\"name\": \"Repeatability\", \"type\": \"Machine Property\"},\n {\"name\": \"Rheology\", \"type\": \"Material Science\"},\n {\"name\": \"Photopolymer\", \"type\": \"Material\"},\n {\"name\": \"Thermoplastic\", \"type\": \"Material\"},\n {\"name\": \"Temperature Gradients\", \"type\": \"Environmental Factor\"},\n {\"name\": \"Vibrations\", \"type\": \"Environmental Factor\"},\n {\"name\": \"Deflections\", \"type\": \"Environmental Factor\"},\n {\"name\": \"Fortus 450 MC\", \"type\": \"3D Printer\"},\n {\"name\": \"F123\", \"type\": \"3D Printer\"},\n {\"name\": \"Stratasys PolyJet\", \"type\": \"3D Printer\"},\n {\"name\": \"Layer Thickness\", \"type\": \"Design Parameter\"},\n {\"name\": \"Minimum Wall Thickness\", \"type\": \"Design Parameter\"},\n {\"name\": \"Minimum Feature Size\", \"type\": \"Design Parameter\"},\n {\"name\": \"Breakaway Support\", \"type\": \"Support Type\"},\n {\"name\": \"Soluble Support\", \"type\": \"Support Type\"},\n {\"name\": \"Post Processing\", \"type\": \"Process\"},\n {\"name\": \"File Conversion\", \"type\": \"Process\"},\n {\"name\": \"Overhangs\", \"type\": \"Design Challenge\"},\n {\"name\": \"Closed Systems\", \"type\": \"Design Type\"},\n {\"name\": \"GrabCAD\", \"type\": \"Design Repository\"},\n {\"name\": \"Euclidean Space\", \"type\": \"Mathematical Concept\"},\n {\"name\": \"Point\", \"type\": \"Geometric Element\"},\n {\"name\": \"Triangle\", \"type\": \"Geometric Element\"},\n {\"name\": \"Curved Surfaces\", \"type\": \"Geometric Feature\"},\n {\"name\": \"End Use\", \"type\": \"Design Consideration\"},\n {\"name\": \"Optimal Materials\", \"type\": \"Design Consideration\"},\n {\"name\": \"Tools or Technology\", \"type\": \"Design Consideration\"},\n {\"name\": \"Additive Manufacturing\", \"type\": \"Manufacturing Process\"}\n ],\n \"edges\": [\n {\"subject\": \"3D Printing\", \"predicate\": \"uses\", \"object\": \"Slicing\"},\n {\"subject\": \"3D Printing\", \"predicate\": \"requires\", \"object\": \"Support\"},\n {\"subject\": \"3D Printing\", \"predicate\": \"involves\", \"object\": \"Part Density\"},\n {\"subject\": \"CAD\", \"predicate\": \"converts to\", \"object\": \"CAM\"},\n {\"subject\": \"CAD\", \"predicate\": \"exports to\", \"object\": \"STL\"},\n {\"subject\": \"STL\", \"predicate\": \"is a type of\", \"object\": \"Mesh\"},\n {\"subject\": \"STL\", \"predicate\": \"can be\", \"object\": \"ASCII\"},\n {\"subject\": \"STL\", \"predicate\": \"can be\", \"object\": \"Binary\"},\n {\"subject\": \"FDM\", \"predicate\": \"has design considerations for\", \"object\": \"Wall Thickness\"},\n {\"subject\": \"PolyJet\", \"predicate\": \"has design considerations for\", \"object\": \"Wall Thickness\"},\n {\"subject\": \"J750\", \"predicate\": \"supports\", \"object\": \"Texture Mapping\"},\n {\"subject\": \"Mesh\", \"p",
"error": null
},
"condition_b": {
"local_latency_s": 129.91,
"local_metadata": {
"language": "en",
"char_length": 2834,
"primary_format": "prose",
"structural_signals": {
"has_headings": true,
"has_bullet_lists": false,
"has_numbered_lists": false,
"has_tables": false,
"has_code_blocks": false,
"has_dates": false
},
"content_signals": {
"has_named_people": false,
"has_institutional_language": true,
"has_technical_terminology": true,
"has_first_person": false,
"has_quotations": false
},
"domain_class": "technical",
"one_sentence_summary": "Document discusses design considerations for 3D printing, including slicing, support, part density, wall thickness, tolerance, clearance, infill, mesh, STL, ASCII and Binary files, and design considerations for FDM & PolyJet."
},
"local_raw": "{\n \"language\": \"en\",\n \"char_length\": 2834,\n \"primary_format\": \"prose\",\n \"structural_signals\": {\n \"has_headings\": true,\n \"has_bullet_lists\": false,\n \"has_numbered_lists\": false,\n \"has_tables\": false,\n \"has_code_blocks\": false,\n \"has_dates\": false\n },\n \"content_signals\": {\n \"has_named_people\": false,\n \"has_institutional_language\": true,\n \"has_technical_terminology\": true,\n \"has_first_person\": false,\n \"has_quotations\": false\n },\n \"domain_class\": \"technical\",\n \"one_sentence_summary\": \"Document discusses design considerations for 3D printing, including slicing, support, part density, wall thickness, tolerance, clearance, infill, mesh, STL, ASCII and Binary files, and design considerations for FDM & PolyJet.\"\n}",
"api_input_tokens": 3249,
"api_output_tokens": 4347,
"api_latency_s": 20.38,
"metrics": {
"n_entities": 118,
"n_edges": 93,
"predicate_diversity": 51,
"type_diversity": 42,
"avg_degree": 1.58,
"largest_component": 21,
"largest_component_pct": 17.8
},
"stop_reason": "end_turn",
"response_text": "```json\n{\n \"entities\": [\n {\"name\": \"3D printing\", \"type\": \"technology\"},\n {\"name\": \"slicing\", \"type\": \"process\"},\n {\"name\": \"support\", \"type\": \"component\"},\n {\"name\": \"part density\", \"type\": \"property\"},\n {\"name\": \"wall thickness\", \"type\": \"property\"},\n {\"name\": \"tolerance\", \"type\": \"property\"},\n {\"name\": \"clearance\", \"type\": \"property\"},\n {\"name\": \"infill\", \"type\": \"component\"},\n {\"name\": \"mesh\", \"type\": \"data_structure\"},\n {\"name\": \"STL\", \"type\": \"file_format\"},\n {\"name\": \"ASCII\", \"type\": \"file_format\"},\n {\"name\": \"Binary\", \"type\": \"file_format\"},\n {\"name\": \"FDM\", \"type\": \"printing_technology\"},\n {\"name\": \"PolyJet\", \"type\": \"printing_technology\"},\n {\"name\": \"CAD\", \"type\": \"software_type\"},\n {\"name\": \"CAM\", \"type\": \"software_type\"},\n {\"name\": \"nozzle diameter\", \"type\": \"property\"},\n {\"name\": \"droplet size\", \"type\": \"property\"},\n {\"name\": \"material rheology\", \"type\": \"property\"},\n {\"name\": \"minimum printable feature size\", \"type\": \"property\"},\n {\"name\": \"minimum usable feature size\", \"type\": \"property\"},\n {\"name\": \"XY clearance\", \"type\": \"property\"},\n {\"name\": \"Z clearance\", \"type\": \"property\"},\n {\"name\": \"layer thickness\", \"type\": \"property\"},\n {\"name\": \"support material\", \"type\": \"material\"},\n {\"name\": \"accuracy\", \"type\": \"property\"},\n {\"name\": \"resolution\", \"type\": \"property\"},\n {\"name\": \"repeatability\", \"type\": \"property\"},\n {\"name\": \"precision\", \"type\": \"property\"},\n {\"name\": \"temperature gradients\", \"type\": \"factor\"},\n {\"name\": \"vibrations\", \"type\": \"factor\"},\n {\"name\": \"deflections\", \"type\": \"factor\"},\n {\"name\": \"photopolymer\", \"type\": \"material\"},\n {\"name\": \"thermoplastic\", \"type\": \"material\"},\n {\"name\": \"polymerization\", \"type\": \"process\"},\n {\"name\": \"solidification\", \"type\": \"process\"},\n {\"name\": \"cooling\", \"type\": \"process\"},\n {\"name\": \"UV light\", \"type\": \"technology\"},\n {\"name\": \"machine specifications\", \"type\": \"documentation\"},\n {\"name\": \"Fortus 450 MC\", \"type\": \"printer_model\"},\n {\"name\": \"F123\", \"type\": \"printer_model\"},\n {\"name\": \"Stratasys PolyJet\", \"type\": \"printer_model\"},\n {\"name\": \"J750\", \"type\": \"printer_model\"},\n {\"name\": \"end use\", \"type\": \"design_consideration\"},\n {\"name\": \"post processing\", \"type\": \"process\"},\n {\"name\": \"optimal materials\", \"type\": \"design_consideration\"},\n {\"name\": \"toolpaths\", \"type\": \"output\"},\n {\"name\": \"overhangs\", \"type\": \"structure\"},\n {\"name\": \"closed systems\", \"type\": \"design_type\"},\n {\"name\": \"moving parts\", \"type\": \"component\"},\n {\"name\": \"fusing\", \"type\": \"process\"},\n {\"name\": \"axes of motion\", \"type\": \"property\"},\n {\"name\": \"mesh file\", \"type\": \"file_type\"},\n {\"name\": \"machine accuracy\", \"type\": \"property\"},\n {\"name\": \"control system\", \"type\": \"component\"},\n {\"name\": \"deviations\", \"type\": \"property\"},\n {\"name\": \"mechanical capability\", \"type\": \"property\"},\n {\"name\": \"fluid properties\", \"type\": \"property\"},\n {\"name\": \"micron\", \"type\": \"unit\"},\n {\"name\": \"measurement increments\", \"type\": \"property\"},\n {\"name\": \"nozzle\", \"type\": \"component\"},\n {\"name\": \"motion increments\", \"type\": \"property\"},\n {\"name\": \"material flow\", \"type\": \"process\"},\n {\"name\": \"non-random\", \"type\": \"property\"},\n {\"name\": \"liquids\", \"type\": \"material_state\"},\n {\"name\": \"soft solids\", \"type\": \"material_state\"},\n {\"name\": \"melted thermoplastic\", \"type\": \"material\"},\n {\"name\": \"droplet\", \"type\": \"component\"},\n {\"name\": \"shape change\", \"type\": \"process\"},\n {\"name\": \"commercial 3D printers\", \"type\": \"printer_category\"},\n {\"name\": \"hobbyist 3D printers\", \"type\": \"printer_category\"},\n {\"name\": \"material properties\", \"type\": \"property\"},\n {\"name\": \"part quality\", \"type\": \"property\"},\n {\"name\": \"Z-direction\", \"type\": \"axis\"},\n {\"name\": \"X direction\", \"type\": \"axis\"},\n {\"name\": \"Y direction\", \"type\": \"axis\"},\n {\"name\": \"XY plane\", \"type\": \"plane\"},\n {\"name\": \"Z height\", \"type\": \"me",
"error": null
},
"_type": "course_ppt"
},
{
"source": "2020 - DDF 220 - Intro to Computational Media Syllabus.pdf",
"size_bucket": "large",
"doc_chars_original": 18696,
"doc_chars_sent": 12000,
"truncated": true,
"condition_a": {
"input_tokens": 2857,
"output_tokens": 1824,
"latency_s": 8.63,
"metrics": {
"n_entities": 42,
"n_edges": 38,
"predicate_diversity": 20,
"type_diversity": 25,
"avg_degree": 1.81,
"largest_component": 36,
"largest_component_pct": 85.7
},
"stop_reason": "end_turn",
"response_text": "```json\n{\n \"entities\": [\n {\"name\": \"Intro to Computational Media\", \"type\": \"Course\"},\n {\"name\": \"DDF 220-01\", \"type\": \"Course Code\"},\n {\"name\": \"Aaron Nelson\", \"type\": \"Instructor\"},\n {\"name\": \"Fall 2020\", \"type\": \"Semester\"},\n {\"name\": \"EIH 104\", \"type\": \"Classroom\"},\n {\"name\": \"HVAMC\", \"type\": \"Building\"},\n {\"name\": \"Processing\", \"type\": \"Programming Language\"},\n {\"name\": \"Java\", \"type\": \"Programming Language\"},\n {\"name\": \"Python\", \"type\": \"Programming Language\"},\n {\"name\": \"Grasshopper\", \"type\": \"Software\"},\n {\"name\": \"Rhino\", \"type\": \"Software\"},\n {\"name\": \"Blackboard\", \"type\": \"Learning Management System\"},\n {\"name\": \"Learning Processing\", \"type\": \"Textbook\"},\n {\"name\": \"Daniel Shiffman\", \"type\": \"Author\"},\n {\"name\": \"Morgan Kaufmann\", \"type\": \"Publisher\"},\n {\"name\": \"Algorithm Aided Design\", \"type\": \"Textbook\"},\n {\"name\": \"Arturo Tedeschi\", \"type\": \"Author\"},\n {\"name\": \"variables\", \"type\": \"Programming Concept\"},\n {\"name\": \"conditionals\", \"type\": \"Programming Concept\"},\n {\"name\": \"loops\", \"type\": \"Programming Concept\"},\n {\"name\": \"iteration\", \"type\": \"Programming Concept\"},\n {\"name\": \"image processing\", \"type\": \"Topic\"},\n {\"name\": \"interactivity\", \"type\": \"Topic\"},\n {\"name\": \"3D algorithmic modeling\", \"type\": \"Topic\"},\n {\"name\": \"Homeworks\", \"type\": \"Assignment\"},\n {\"name\": \"Mid Semester Project\", \"type\": \"Assignment\"},\n {\"name\": \"Final Project\", \"type\": \"Assignment\"},\n {\"name\": \"Participation\", \"type\": \"Grading Component\"},\n {\"name\": \"October 30th\", \"type\": \"Date\"},\n {\"name\": \"Disability Resource Center\", \"type\": \"Institution\"},\n {\"name\": \"Student Union\", \"type\": \"Location\"},\n {\"name\": \"Office of Veteran and Military Services\", \"type\": \"Institution\"},\n {\"name\": \"Title IX Office\", \"type\": \"Institution\"},\n {\"name\": \"Sojourner Truth Library\", \"type\": \"Library\"},\n {\"name\": \"SUNY New Paltz\", \"type\": \"University\"},\n {\"name\": \"electronic arts\", \"type\": \"Discipline\"},\n {\"name\": \"visual design\", \"type\": \"Discipline\"},\n {\"name\": \"new media art\", \"type\": \"Discipline\"},\n {\"name\": \"2D Pixel Processing\", \"type\": \"Project Type\"},\n {\"name\": \"3D design\", \"type\": \"Design Type\"},\n {\"name\": \"generative works\", \"type\": \"Art Form\"},\n {\"name\": \"interactive works\", \"type\": \"Art Form\"}\n ],\n \"edges\": [\n {\"subject\": \"Intro to Computational Media\", \"predicate\": \"has course code\", \"object\": \"DDF 220-01\"},\n {\"subject\": \"Intro to Computational Media\", \"predicate\": \"is taught by\", \"object\": \"Aaron Nelson\"},\n {\"subject\": \"Intro to Computational Media\", \"predicate\": \"is offered in\", \"object\": \"Fall 2020\"},\n {\"subject\": \"Intro to Computational Media\", \"predicate\": \"meets in\", \"object\": \"EIH 104\"},\n {\"subject\": \"EIH 104\", \"predicate\": \"is located in\", \"object\": \"HVAMC\"},\n {\"subject\": \"Aaron Nelson\", \"predicate\": \"has office hours\", \"object\": \"M-R 10am \u2013 12pm\"},\n {\"subject\": \"Intro to Computational Media\", \"predicate\": \"teaches\", \"object\": \"Processing\"},\n {\"subject\": \"Processing\", \"predicate\": \"is built on\", \"object\": \"Java\"},\n {\"subject\": \"Intro to Computational Media\", \"predicate\": \"explores\", \"object\": \"Python\"},\n {\"subject\": \"Intro to Computational Media\", \"predicate\": \"explores\", \"object\": \"Grasshopper\"},\n {\"subject\": \"Intro to Computational Media\", \"predicate\": \"teaches\", \"object\": \"Rhino\"},\n {\"subject\": \"Processing\", \"predicate\": \"is used for\", \"object\": \"electronic arts\"},\n {\"subject\": \"Processing\", \"predicate\": \"is used for\", \"object\": \"visual design\"},\n {\"subject\": \"Processing\", \"predicate\": \"is used for\", \"object\": \"new media art\"},\n {\"subject\": \"Intro to Computational Media\", \"predicate\": \"covers\", \"object\": \"variables\"},\n {\"subject\": \"Intro to Computational Media\", \"predicate\": \"covers\", \"object\": \"conditionals\"},\n {\"subject\": \"Intro to Computational Media\", \"predicate\": \"covers\", \"object\": \"loops\"},\n {\"subject\": \"Intro to Computational Media\", \"predicate\": \"cover",
"error": null
},
"condition_b": {
"local_latency_s": 117.37,
"local_metadata": {
"language": "en",
"char_length": 3246,
"primary_format": "prose",
"structural_signals": {
"has_headings": true,
"has_bullet_lists": false,
"has_numbered_lists": false,
"has_tables": false,
"has_code_blocks": false,
"has_dates": false
},
"content_signals": {
"has_named_people": true,
"has_institutional_language": true,
"has_technical_terminology": true,
"has_first_person": false,
"has_quotations": false
},
"domain_class": "educational",
"one_sentence_summary": "Introduction to Computational Media course for programming in digital arts with Processing and Rhino"
},
"local_raw": "{\n \"language\": \"en\",\n \"char_length\": 3246,\n \"primary_format\": \"prose\",\n \"structural_signals\": {\n \"has_headings\": true,\n \"has_bullet_lists\": false,\n \"has_numbered_lists\": false,\n \"has_tables\": false,\n \"has_code_blocks\": false,\n \"has_dates\": false\n },\n \"content_signals\": {\n \"has_named_people\": true,\n \"has_institutional_language\": true,\n \"has_technical_terminology\": true,\n \"has_first_person\": false,\n \"has_quotations\": false\n },\n \"domain_class\": \"educational\",\n \"one_sentence_summary\": \"Introduction to Computational Media course for programming in digital arts with Processing and Rhino\"\n}",
"api_input_tokens": 3119,
"api_output_tokens": 3226,
"api_latency_s": 15.53,
"metrics": {
"n_entities": 93,
"n_edges": 51,
"predicate_diversity": 30,
"type_diversity": 24,
"avg_degree": 1.1,
"largest_component": 31,
"largest_component_pct": 33.3
},
"stop_reason": "end_turn",
"response_text": "```json\n{\n \"entities\": [\n {\"name\": \"Intro to Computational Media\", \"type\": \"Course\"},\n {\"name\": \"DDF 220-01\", \"type\": \"Course Code\"},\n {\"name\": \"Aaron Nelson\", \"type\": \"Person\"},\n {\"name\": \"EIH 104\", \"type\": \"Location\"},\n {\"name\": \"HVAMC\", \"type\": \"Location\"},\n {\"name\": \"Processing\", \"type\": \"Programming Language\"},\n {\"name\": \"Java\", \"type\": \"Programming Language\"},\n {\"name\": \"Python\", \"type\": \"Programming Language\"},\n {\"name\": \"Grasshopper\", \"type\": \"Software\"},\n {\"name\": \"Rhino\", \"type\": \"Software\"},\n {\"name\": \"Blackboard\", \"type\": \"Learning Management System\"},\n {\"name\": \"Learning Processing\", \"type\": \"Textbook\"},\n {\"name\": \"Daniel Shiffman\", \"type\": \"Person\"},\n {\"name\": \"Morgan Kaufmann\", \"type\": \"Publisher\"},\n {\"name\": \"Algorithm Aided Design\", \"type\": \"Textbook\"},\n {\"name\": \"Arturo Tedeschi\", \"type\": \"Person\"},\n {\"name\": \"variables\", \"type\": \"Programming Concept\"},\n {\"name\": \"conditionals\", \"type\": \"Programming Concept\"},\n {\"name\": \"loops\", \"type\": \"Programming Concept\"},\n {\"name\": \"iteration\", \"type\": \"Programming Concept\"},\n {\"name\": \"image processing\", \"type\": \"Topic\"},\n {\"name\": \"interactivity\", \"type\": \"Topic\"},\n {\"name\": \"3D algorithmic modeling\", \"type\": \"Topic\"},\n {\"name\": \"digital arts\", \"type\": \"Field\"},\n {\"name\": \"visual design\", \"type\": \"Field\"},\n {\"name\": \"new media art\", \"type\": \"Field\"},\n {\"name\": \"electronic arts\", \"type\": \"Field\"},\n {\"name\": \"Homeworks\", \"type\": \"Assignment\"},\n {\"name\": \"Mid Semester Project and Presentation\", \"type\": \"Assignment\"},\n {\"name\": \"Final Project\", \"type\": \"Assignment\"},\n {\"name\": \"Participation\", \"type\": \"Grading Component\"},\n {\"name\": \"2D Pixel Processing Program\", \"type\": \"Assignment\"},\n {\"name\": \"Printed Layout\", \"type\": \"Assignment\"},\n {\"name\": \"Boolean Quiz\", \"type\": \"Assignment\"},\n {\"name\": \"Simple UI Looping\", \"type\": \"Assignment\"},\n {\"name\": \"2D Attractor System\", \"type\": \"Assignment\"},\n {\"name\": \"Sun, Moon, and Earth\", \"type\": \"Assignment\"},\n {\"name\": \"Functions Distance Function\", \"type\": \"Assignment\"},\n {\"name\": \"5 Images\", \"type\": \"Assignment\"},\n {\"name\": \"Image Project\", \"type\": \"Assignment\"},\n {\"name\": \"3D Grid\", \"type\": \"Assignment\"},\n {\"name\": \"3D Attractor System\", \"type\": \"Assignment\"},\n {\"name\": \"Curve Diagrid\", \"type\": \"Assignment\"},\n {\"name\": \"Laser Tests\", \"type\": \"Assignment\"},\n {\"name\": \"Laser Folding Patterns\", \"type\": \"Assignment\"},\n {\"name\": \"3D Image Patterns\", \"type\": \"Assignment\"},\n {\"name\": \"Vase Definitions\", \"type\": \"Assignment\"},\n {\"name\": \"Surface Skinning\", \"type\": \"Assignment\"},\n {\"name\": \"Voronoi and Mesh Subdivision\", \"type\": \"Assignment\"},\n {\"name\": \"Fall 2020\", \"type\": \"Semester\"},\n {\"name\": \"SUNY New Paltz\", \"type\": \"Institution\"},\n {\"name\": \"Disability Resource Center\", \"type\": \"Institution\"},\n {\"name\": \"Deanna Knapp\", \"type\": \"Person\"},\n {\"name\": \"Jean Vizvary\", \"type\": \"Person\"},\n {\"name\": \"Office of Veteran and Military Services\", \"type\": \"Institution\"},\n {\"name\": \"Title IX Office\", \"type\": \"Institution\"},\n {\"name\": \"Office of Human Resources, Diversity & Inclusion\", \"type\": \"Institution\"},\n {\"name\": \"Sojourner Truth Library\", \"type\": \"Institution\"},\n {\"name\": \"Student Evaluation of Instruction\", \"type\": \"Assessment\"},\n {\"name\": \"syntax errors\", \"type\": \"Programming Concept\"},\n {\"name\": \"logic errors\", \"type\": \"Programming Concept\"},\n {\"name\": \"expressions\", \"type\": \"Programming Concept\"},\n {\"name\": \"control structure\", \"type\": \"Programming Concept\"},\n {\"name\": \"algorithms\", \"type\": \"Programming Concept\"},\n {\"name\": \"generative works\", \"type\": \"Art Form\"},\n {\"name\": \"interactive works\", \"type\": \"Art Form\"},\n {\"name\": \"3D design\", \"type\": \"Field\"},\n {\"name\": \"Parametric Representations of Curves\", \"type\": \"Topic\"},\n {\"name\": \"Parametric Representation of Surfaces\", \"type\": \"Topic\"},\n {\"name\": \"Curvature and Developable Surfa",
"error": null
},
"_type": "syllabus"
},
{
"source": "Annual Report - 2020.pdf",
"size_bucket": "large",
"doc_chars_original": 10110,
"doc_chars_sent": 10110,
"truncated": false,
"condition_a": {
"input_tokens": 2725,
"output_tokens": 2405,
"latency_s": 14.14,
"metrics": {
"n_entities": 44,
"n_edges": 54,
"predicate_diversity": 25,
"type_diversity": 23,
"avg_degree": 2.45,
"largest_component": 43,
"largest_component_pct": 97.7
},
"stop_reason": "end_turn",
"response_text": "```json\n{\n \"entities\": [\n {\"name\": \"Aaron Nelson\", \"type\": \"Faculty Member\"},\n {\"name\": \"Assistant Professor\", \"type\": \"Academic Rank\"},\n {\"name\": \"Fine and Performing Arts\", \"type\": \"School/College\"},\n {\"name\": \"Art\", \"type\": \"Department\"},\n {\"name\": \"Making Things Move\", \"type\": \"Course\"},\n {\"name\": \"DDF 310-01\", \"type\": \"Course Code\"},\n {\"name\": \"Design Intents\", \"type\": \"Course\"},\n {\"name\": \"DDF 320-01\", \"type\": \"Course Code\"},\n {\"name\": \"Introduction to Computational Design\", \"type\": \"Course\"},\n {\"name\": \"DDF 220-01\", \"type\": \"Course Code\"},\n {\"name\": \"Material Studies\", \"type\": \"Course\"},\n {\"name\": \"DDF 305-01\", \"type\": \"Course Code\"},\n {\"name\": \"COVID-19 pandemic\", \"type\": \"Event\"},\n {\"name\": \"DDF design curriculum\", \"type\": \"Curriculum\"},\n {\"name\": \"Spring 2020\", \"type\": \"Term\"},\n {\"name\": \"Fall 2020\", \"type\": \"Term\"},\n {\"name\": \"Eisgruber\", \"type\": \"Student\"},\n {\"name\": \"Rabadi\", \"type\": \"Student\"},\n {\"name\": \"Coffin\", \"type\": \"Student\"},\n {\"name\": \"Henix\", \"type\": \"Student\"},\n {\"name\": \"Raber\", \"type\": \"Student\"},\n {\"name\": \"Wong\", \"type\": \"Student\"},\n {\"name\": \"Open Source Powder Printer\", \"type\": \"Student Project\"},\n {\"name\": \"Reale\", \"type\": \"Student\"},\n {\"name\": \"Scribani\", \"type\": \"Student\"},\n {\"name\": \"Rubin\", \"type\": \"Student\"},\n {\"name\": \"Orlando\", \"type\": \"Student\"},\n {\"name\": \"3D Print Dispenser\", \"type\": \"Student Project\"},\n {\"name\": \"College Art Association 2020 Annual Conference\", \"type\": \"Conference\"},\n {\"name\": \"Digital and Haptic: Merging New and Old Technology\", \"type\": \"Presentation\"},\n {\"name\": \"NoVo Foundation\", \"type\": \"Granting Agency\"},\n {\"name\": \"Scoping the Makerspace at the Metro Center\", \"type\": \"Grant Project\"},\n {\"name\": \"IBM\", \"type\": \"Granting Agency\"},\n {\"name\": \"Applications of Injection Molding Using Polymer Molds and Designing for Additive Manufacturing\", \"type\": \"Grant Project\"},\n {\"name\": \"Phase 2 of IBM/New Paltz Additive Manufacturing Collaboration\", \"type\": \"Grant Project\"},\n {\"name\": \"Art Departmental Committee\", \"type\": \"Committee\"},\n {\"name\": \"HVAMC\", \"type\": \"Organization\"},\n {\"name\": \"Face shields design\", \"type\": \"Community Project\"},\n {\"name\": \"College Art Association\", \"type\": \"Professional Membership\"},\n {\"name\": \"Additive Manufacturing and COVID 19\", \"type\": \"Media Appearance\"},\n {\"name\": \"Academic Minute WAMC NPR\", \"type\": \"Media Outlet\"},\n {\"name\": \"DDF Graduate program\", \"type\": \"Academic Program\"},\n {\"name\": \"Tenure\", \"type\": \"Academic Status\"},\n {\"name\": \"Spring 2021\", \"type\": \"Term\"}\n ],\n \"edges\": [\n {\"subject\": \"Aaron Nelson\", \"predicate\": \"holds rank of\", \"object\": \"Assistant Professor\"},\n {\"subject\": \"Aaron Nelson\", \"predicate\": \"works in\", \"object\": \"Fine and Performing Arts\"},\n {\"subject\": \"Aaron Nelson\", \"predicate\": \"works in\", \"object\": \"Art\"},\n {\"subject\": \"Aaron Nelson\", \"predicate\": \"taught\", \"object\": \"Making Things Move\"},\n {\"subject\": \"Aaron Nelson\", \"predicate\": \"taught\", \"object\": \"Design Intents\"},\n {\"subject\": \"Aaron Nelson\", \"predicate\": \"taught\", \"object\": \"Introduction to Computational Design\"},\n {\"subject\": \"Aaron Nelson\", \"predicate\": \"taught\", \"object\": \"Material Studies\"},\n {\"subject\": \"Making Things Move\", \"predicate\": \"has course code\", \"object\": \"DDF 310-01\"},\n {\"subject\": \"Design Intents\", \"predicate\": \"has course code\", \"object\": \"DDF 320-01\"},\n {\"subject\": \"Introduction to Computational Design\", \"predicate\": \"has course code\", \"object\": \"DDF 220-01\"},\n {\"subject\": \"Material Studies\", \"predicate\": \"has course code\", \"object\": \"DDF 305-01\"},\n {\"subject\": \"Aaron Nelson\", \"predicate\": \"led curriculum transition in response to\", \"object\": \"COVID-19 pandemic\"},\n {\"subject\": \"DDF design curriculum\", \"predicate\": \"moved to digital delivery format in\", \"object\": \"Spring 2020\"},\n {\"subject\": \"Aaron Nelson\", \"predicate\": \"redesigned course for online delivery\", \"object",
"error": null
},
"condition_b": {
"local_latency_s": 117.65,
"local_metadata": {
"language": "en",
"char_length": 1347,
"primary_format": "prose",
"structural_signals": {
"has_headings": true,
"has_bullet_lists": false,
"has_numbered_lists": false,
"has_tables": false,
"has_code_blocks": false,
"has_dates": true
},
"content_signals": {
"has_named_people": true,
"has_institutional_language": true,
"has_technical_terminology": true,
"has_first_person": true,
"has_quotations": false
},
"domain_class": "educational",
"one_sentence_summary": "A faculty annual report detailing Aaron Nelson's academic and creative activities during the reporting period of January 1, 2020 \u2013 December 31, 2020."
},
"local_raw": "{\n \"language\": \"en\",\n \"char_length\": 1347,\n \"primary_format\": \"prose\",\n \"structural_signals\": {\n \"has_headings\": true,\n \"has_bullet_lists\": false,\n \"has_numbered_lists\": false,\n \"has_tables\": false,\n \"has_code_blocks\": false,\n \"has_dates\": true\n },\n \"content_signals\": {\n \"has_named_people\": true,\n \"has_institutional_language\": true,\n \"has_technical_terminology\": true,\n \"has_first_person\": true,\n \"has_quotations\": false\n },\n \"domain_class\": \"educational\",\n \"one_sentence_summary\": \"A faculty annual report detailing Aaron Nelson's academic and creative activities during the reporting period of January 1, 2020 \u2013 December 31, 2020.\"\n}",
"api_input_tokens": 3004,
"api_output_tokens": 3093,
"api_latency_s": 14.47,
"metrics": {
"n_entities": 58,
"n_edges": 66,
"predicate_diversity": 35,
"type_diversity": 23,
"avg_degree": 2.28,
"largest_component": 43,
"largest_component_pct": 74.1
},
"stop_reason": "end_turn",
"response_text": "```json\n{\n \"entities\": [\n {\"name\": \"Aaron Nelson\", \"type\": \"Person\"},\n {\"name\": \"Assistant Professor\", \"type\": \"Academic Rank\"},\n {\"name\": \"Fine and Performing Arts\", \"type\": \"School/College\"},\n {\"name\": \"Art\", \"type\": \"Department\"},\n {\"name\": \"Making Things Move\", \"type\": \"Course\"},\n {\"name\": \"DDF 310-01\", \"type\": \"Course Code\"},\n {\"name\": \"Design Intents\", \"type\": \"Course\"},\n {\"name\": \"DDF 320-01\", \"type\": \"Course Code\"},\n {\"name\": \"Introduction to Computational Design\", \"type\": \"Course\"},\n {\"name\": \"DDF 220-01\", \"type\": \"Course Code\"},\n {\"name\": \"Material Studies\", \"type\": \"Course\"},\n {\"name\": \"DDF305-01\", \"type\": \"Course Code\"},\n {\"name\": \"Spring 2020\", \"type\": \"Time Period\"},\n {\"name\": \"Summer 2020\", \"type\": \"Time Period\"},\n {\"name\": \"Fall 2020\", \"type\": \"Time Period\"},\n {\"name\": \"Winter 2020-21\", \"type\": \"Time Period\"},\n {\"name\": \"COVID-19 pandemic\", \"type\": \"Event\"},\n {\"name\": \"DDF design curriculum\", \"type\": \"Curriculum\"},\n {\"name\": \"Eisgruber\", \"type\": \"Person\"},\n {\"name\": \"Rabadi\", \"type\": \"Person\"},\n {\"name\": \"Coffin\", \"type\": \"Person\"},\n {\"name\": \"Henix\", \"type\": \"Person\"},\n {\"name\": \"Raber\", \"type\": \"Person\"},\n {\"name\": \"Wong\", \"type\": \"Person\"},\n {\"name\": \"Open Source Powder Printer\", \"type\": \"Student Work\"},\n {\"name\": \"Reale\", \"type\": \"Person\"},\n {\"name\": \"Scribani\", \"type\": \"Person\"},\n {\"name\": \"Rubin\", \"type\": \"Person\"},\n {\"name\": \"Orlando\", \"type\": \"Person\"},\n {\"name\": \"3D Print Dispenser\", \"type\": \"Student Work\"},\n {\"name\": \"College Art Association 2020 Annual Conference\", \"type\": \"Conference\"},\n {\"name\": \"Digital and Haptic: Merging New and Old Technology\", \"type\": \"Presentation\"},\n {\"name\": \"NoVo Foundation\", \"type\": \"Granting Agency\"},\n {\"name\": \"Scoping the Makerspace at the Metro Center\", \"type\": \"Grant Project\"},\n {\"name\": \"IBM\", \"type\": \"Granting Agency\"},\n {\"name\": \"Applications of Injection Molding Using Polymer Molds and Designing for Additive Manufacturing\", \"type\": \"Grant Project\"},\n {\"name\": \"Phase 2 of IBM/New Paltz Additive Manufacturing Collaboration\", \"type\": \"Grant Project\"},\n {\"name\": \"Art Departmental Committee\", \"type\": \"Committee\"},\n {\"name\": \"Diversity, Equity, and Inclusion Art Departmental Committee\", \"type\": \"Committee\"},\n {\"name\": \"HVAMC\", \"type\": \"Organization\"},\n {\"name\": \"face shields\", \"type\": \"Product\"},\n {\"name\": \"College Art Association\", \"type\": \"Professional Organization\"},\n {\"name\": \"Academic Minute WAMC NPR\", \"type\": \"Media Outlet\"},\n {\"name\": \"Additive Manufacturing and COVID 19\", \"type\": \"Media Appearance\"},\n {\"name\": \"DDF Graduate program\", \"type\": \"Academic Program\"},\n {\"name\": \"January 1, 2020\", \"type\": \"Date\"},\n {\"name\": \"December 31, 2020\", \"type\": \"Date\"},\n {\"name\": \"September 1, 2020\", \"type\": \"Date\"},\n {\"name\": \"January 31, 2021\", \"type\": \"Date\"},\n {\"name\": \"February 1, 2021\", \"type\": \"Date\"},\n {\"name\": \"August 31, 2021\", \"type\": \"Date\"},\n {\"name\": \"August 15, 2020\", \"type\": \"Date\"},\n {\"name\": \"March 14, 2021\", \"type\": \"Date\"},\n {\"name\": \"October 19, 2020\", \"type\": \"Date\"},\n {\"name\": \"April 18, 2021\", \"type\": \"Date\"},\n {\"name\": \"Fall 2022\", \"type\": \"Time Period\"},\n {\"name\": \"Spring\", \"type\": \"Time Period\"},\n {\"name\": \"tenure\", \"type\": \"Academic Status\"}\n ],\n \"edges\": [\n {\"subject\": \"Aaron Nelson\", \"predicate\": \"holds rank of\", \"object\": \"Assistant Professor\"},\n {\"subject\": \"Aaron Nelson\", \"predicate\": \"works in\", \"object\": \"Fine and Performing Arts\"},\n {\"subject\": \"Aaron Nelson\", \"predicate\": \"works in\", \"object\": \"Art\"},\n {\"subject\": \"Aaron Nelson\", \"predicate\": \"taught\", \"object\": \"Making Things Move\"},\n {\"subject\": \"Aaron Nelson\", \"predicate\": \"taught\", \"object\": \"Design Intents\"},\n {\"subject\": \"Aaron Nelson\", \"predicate\": \"taught\", \"object\": \"Introduction to Computational Design\"},\n {\"subject\": \"Aaron Nelson\", \"predicate\": \"taught\", \"object\": \"Material Studies\"},",
"error": null
},
"_type": "faculty_report"
},
{
"source": "Claude: Preparing for dinner with Jim Agutter",
"size_bucket": "large",
"doc_chars_original": 10053,
"doc_chars_sent": 10053,
"truncated": false,
"condition_a": {
"input_tokens": 2269,
"output_tokens": 2516,
"latency_s": 14.25,
"metrics": {
"n_entities": 58,
"n_edges": 51,
"predicate_diversity": 28,
"type_diversity": 24,
"avg_degree": 1.76,
"largest_component": 36,
"largest_component_pct": 62.1
},
"stop_reason": "end_turn",
"response_text": "```json\n{\n \"entities\": [\n {\"name\": \"Jim Agutter\", \"type\": \"Person\"},\n {\"name\": \"Division of Multi-Disciplinary Design (MDD)\", \"type\": \"Academic Program\"},\n {\"name\": \"University of Utah\", \"type\": \"Institution\"},\n {\"name\": \"HVAMC\", \"type\": \"Laboratory/Facility\"},\n {\"name\": \"Information visualization\", \"type\": \"Research Field\"},\n {\"name\": \"Design\", \"type\": \"Research Field\"},\n {\"name\": \"Healthcare data visualization\", \"type\": \"Research Field\"},\n {\"name\": \"Complex systems display\", \"type\": \"Research Field\"},\n {\"name\": \"Computational design\", \"type\": \"Research Field\"},\n {\"name\": \"Innovation Lab\", \"type\": \"Laboratory\"},\n {\"name\": \"College of Architecture + Planning\", \"type\": \"Academic Department\"},\n {\"name\": \"Spark Design Initiative\", \"type\": \"Program\"},\n {\"name\": \"Bachelor of University Studies program\", \"type\": \"Academic Program\"},\n {\"name\": \"Office of Undergraduate Studies\", \"type\": \"Academic Department\"},\n {\"name\": \"Frank Drews\", \"type\": \"Person\"},\n {\"name\": \"Dave Strayer\", \"type\": \"Person\"},\n {\"name\": \"Applied Medical Visualizations (Medvis)\", \"type\": \"Company\"},\n {\"name\": \"GE Healthcare\", \"type\": \"Company\"},\n {\"name\": \"ICU nurse situation awareness\", \"type\": \"Research Topic\"},\n {\"name\": \"Medication management\", \"type\": \"Research Topic\"},\n {\"name\": \"Arterial blood gas data visualization\", \"type\": \"Research Topic\"},\n {\"name\": \"UGuide\", \"type\": \"Software/Tool\"},\n {\"name\": \"AI Community of Practice\", \"type\": \"Organization\"},\n {\"name\": \"Connect2Health\", \"type\": \"Program\"},\n {\"name\": \"Honors Praxis Lab\", \"type\": \"Laboratory\"},\n {\"name\": \"Hope in a Time of Dying\", \"type\": \"Novel\"},\n {\"name\": \"LGBTQ+ fiction\", \"type\": \"Genre\"},\n {\"name\": \"Creative Achievement Award\", \"type\": \"Award\"},\n {\"name\": \"ACSA\", \"type\": \"Organization\"},\n {\"name\": \"University Honors Professorship\", \"type\": \"Award\"},\n {\"name\": \"Early Career Teaching Award\", \"type\": \"Award\"},\n {\"name\": \"Beacon of Excellence Award\", \"type\": \"Award\"},\n {\"name\": \"Distinguished Innovation and Impact Award\", \"type\": \"Award\"},\n {\"name\": \"IBM\", \"type\": \"Company\"},\n {\"name\": \"Braskem\", \"type\": \"Company\"},\n {\"name\": \"Selux\", \"type\": \"Company\"},\n {\"name\": \"Stable Diffusion\", \"type\": \"Technology\"},\n {\"name\": \"Kingston lunette\", \"type\": \"Project\"},\n {\"name\": \"SUNY New Paltz\", \"type\": \"Institution\"},\n {\"name\": \"Grasshopper\", \"type\": \"Software\"},\n {\"name\": \"Python\", \"type\": \"Programming Language\"},\n {\"name\": \"School of Computing\", \"type\": \"Academic Department\"},\n {\"name\": \"School of Medicine\", \"type\": \"Academic Department\"},\n {\"name\": \"College of Engineering\", \"type\": \"Academic Department\"},\n {\"name\": \"FDM\", \"type\": \"Manufacturing Process\"},\n {\"name\": \"PolyJet\", \"type\": \"Manufacturing Process\"},\n {\"name\": \"SLA\", \"type\": \"Manufacturing Process\"},\n {\"name\": \"DMLS\", \"type\": \"Manufacturing Process\"},\n {\"name\": \"CNC\", \"type\": \"Manufacturing Process\"},\n {\"name\": \"Additive manufacturing\", \"type\": \"Manufacturing Field\"},\n {\"name\": \"Heritage preservation\", \"type\": \"Field\"},\n {\"name\": \"Medical device prototyping\", \"type\": \"Research Area\"},\n {\"name\": \"Anatomical models\", \"type\": \"Research Area\"},\n {\"name\": \"Materials science\", \"type\": \"Research Field\"},\n {\"name\": \"Mechanical engineering\", \"type\": \"Research Field\"},\n {\"name\": \"IDSA\", \"type\": \"Organization\"},\n {\"name\": \"Empathic design\", \"type\": \"Design Approach\"},\n {\"name\": \"Chocolate casting project\", \"type\": \"Project\"}\n ],\n \"edges\": [\n {\"subject\": \"Jim Agutter\", \"predicate\": \"is founder of\", \"object\": \"Division of Multi-Disciplinary Design (MDD)\"},\n {\"subject\": \"Jim Agutter\", \"predicate\": \"chairs search committee for\", \"object\": \"MDD\"},\n {\"subject\": \"Division of Multi-Disciplinary Design (MDD)\", \"predicate\": \"is located at\", \"object\": \"University of Utah\"},\n {\"subject\": \"Jim Agutter\", \"predicate\": \"holds position of\", \"object\": \"Senior Associate Dean of Faculty Success ",
"error": null
},
"condition_b": {
"local_latency_s": 94.7,
"local_metadata": {
"language": "en",
"char_length": 3206,
"primary_format": "prose",
"structural_signals": {
"has_headings": true,
"has_bullet_lists": false,
"has_numbered_lists": false,
"has_tables": false,
"has_code_blocks": false,
"has_dates": true
},
"content_signals": {
"has_named_people": true,
"has_institutional_language": true,
"has_technical_terminology": true,
"has_first_person": false,
"has_quotations": false
},
"domain_class": "educational",
"one_sentence_summary": "Discussion between two individuals preparing for a dinner meeting, discussing the background and work of Jim Agutter, a university administrator and designer."
},
"local_raw": "{\n \"language\": \"en\",\n \"char_length\": 3206,\n \"primary_format\": \"prose\",\n \"structural_signals\": {\n \"has_headings\": true,\n \"has_bullet_lists\": false,\n \"has_numbered_lists\": false,\n \"has_tables\": false,\n \"has_code_blocks\": false,\n \"has_dates\": true\n },\n \"content_signals\": {\n \"has_named_people\": true,\n \"has_institutional_language\": true,\n \"has_technical_terminology\": true,\n \"has_first_person\": false,\n \"has_quotations\": false\n },\n \"domain_class\": \"educational\",\n \"one_sentence_summary\": \"Discussion between two individuals preparing for a dinner meeting, discussing the background and work of Jim Agutter, a university administrator and designer.\"\n}",
"api_input_tokens": 2540,
"api_output_tokens": 2395,
"api_latency_s": 12.96,
"metrics": {
"n_entities": 41,
"n_edges": 57,
"predicate_diversity": 35,
"type_diversity": 7,
"avg_degree": 2.78,
"largest_component": 26,
"largest_component_pct": 63.4
},
"stop_reason": "end_turn",
"response_text": "```json\n{\n \"entities\": [\n {\"name\": \"Jim Agutter\", \"type\": \"person\"},\n {\"name\": \"Division of Multi-Disciplinary Design (MDD)\", \"type\": \"organization\"},\n {\"name\": \"University of Utah\", \"type\": \"organization\"},\n {\"name\": \"Office of Undergraduate Studies\", \"type\": \"organization\"},\n {\"name\": \"Innovation Lab\", \"type\": \"organization\"},\n {\"name\": \"College of Architecture + Planning\", \"type\": \"organization\"},\n {\"name\": \"Spark Design Initiative\", \"type\": \"organization\"},\n {\"name\": \"Bachelor of University Studies program\", \"type\": \"program\"},\n {\"name\": \"Frank Drews\", \"type\": \"person\"},\n {\"name\": \"Dave Strayer\", \"type\": \"person\"},\n {\"name\": \"Applied Medical Visualizations (Medvis)\", \"type\": \"organization\"},\n {\"name\": \"GE Healthcare\", \"type\": \"organization\"},\n {\"name\": \"IBM\", \"type\": \"organization\"},\n {\"name\": \"Braskem\", \"type\": \"organization\"},\n {\"name\": \"Selux\", \"type\": \"organization\"},\n {\"name\": \"UGuide\", \"type\": \"project\"},\n {\"name\": \"AI Community of Practice\", \"type\": \"organization\"},\n {\"name\": \"Connect2Health\", \"type\": \"project\"},\n {\"name\": \"Honors Praxis Lab\", \"type\": \"organization\"},\n {\"name\": \"Hope in a Time of Dying\", \"type\": \"work\"},\n {\"name\": \"ACSA\", \"type\": \"organization\"},\n {\"name\": \"HVAMC\", \"type\": \"organization\"},\n {\"name\": \"SUNY New Paltz\", \"type\": \"organization\"},\n {\"name\": \"Kingston lunette\", \"type\": \"project\"},\n {\"name\": \"Stable Diffusion\", \"type\": \"technology\"},\n {\"name\": \"IDSA\", \"type\": \"organization\"},\n {\"name\": \"School of Computing\", \"type\": \"organization\"},\n {\"name\": \"School of Medicine\", \"type\": \"organization\"},\n {\"name\": \"College of Engineering\", \"type\": \"organization\"},\n {\"name\": \"Grasshopper\", \"type\": \"technology\"},\n {\"name\": \"Python\", \"type\": \"technology\"},\n {\"name\": \"FDM\", \"type\": \"technology\"},\n {\"name\": \"PolyJet\", \"type\": \"technology\"},\n {\"name\": \"SLA\", \"type\": \"technology\"},\n {\"name\": \"DMLS\", \"type\": \"technology\"},\n {\"name\": \"CNC\", \"type\": \"technology\"},\n {\"name\": \"Creative Achievement Award\", \"type\": \"award\"},\n {\"name\": \"University Honors Professorship\", \"type\": \"award\"},\n {\"name\": \"Early Career Teaching Award\", \"type\": \"award\"},\n {\"name\": \"Beacon of Excellence Award\", \"type\": \"award\"},\n {\"name\": \"Distinguished Innovation and Impact Award\", \"type\": \"award\"}\n ],\n \"edges\": [\n {\"subject\": \"Jim Agutter\", \"predicate\": \"is founder of\", \"object\": \"Division of Multi-Disciplinary Design (MDD)\"},\n {\"subject\": \"Jim Agutter\", \"predicate\": \"chairs search committee for\", \"object\": \"Division of Multi-Disciplinary Design (MDD)\"},\n {\"subject\": \"Division of Multi-Disciplinary Design (MDD)\", \"predicate\": \"is located at\", \"object\": \"University of Utah\"},\n {\"subject\": \"Jim Agutter\", \"predicate\": \"holds title of\", \"object\": \"Senior Associate Dean of Faculty Success & Academic Innovation\"},\n {\"subject\": \"Jim Agutter\", \"predicate\": \"works in\", \"object\": \"Office of Undergraduate Studies\"},\n {\"subject\": \"Jim Agutter\", \"predicate\": \"is Lab Director of\", \"object\": \"Innovation Lab\"},\n {\"subject\": \"Jim Agutter\", \"predicate\": \"holds appointment in\", \"object\": \"College of Architecture + Planning\"},\n {\"subject\": \"Jim Agutter\", \"predicate\": \"directs\", \"object\": \"Spark Design Initiative\"},\n {\"subject\": \"Jim Agutter\", \"predicate\": \"directs\", \"object\": \"Bachelor of University Studies program\"},\n {\"subject\": \"Jim Agutter\", \"predicate\": \"is past director of\", \"object\": \"Division of Multi-Disciplinary Design (MDD)\"},\n {\"subject\": \"Jim Agutter\", \"predicate\": \"researches\", \"object\": \"medical visualization\"},\n {\"subject\": \"Jim Agutter\", \"predicate\": \"researches\", \"object\": \"ICU nurse situation awareness\"},\n {\"subject\": \"Jim Agutter\", \"predicate\": \"researches\", \"object\": \"integrated information displays for medication management\"},\n {\"subject\": \"Jim Agutter\", \"predicate\": \"researches\", \"object\": \"graphical visualization tools for arterial blood gas data\"},\n {\"subject\": \"Jim Agutte",
"error": null
},
"_type": "conversational"
}
]
}
+482
View File
@@ -0,0 +1,482 @@
{
"meta": {
"docs": 10,
"seed": 42,
"forced_sources": [
"2019-2020 Research and Creative Projects Awards Guidelines.FINAL.pdf",
"2021 - DDF 320 - Design Intents Syllabus.pdf"
]
},
"pairs": [
{
"source": "03_Editing Geometry.docx",
"size_bucket": "small",
"doc_chars": 171,
"metrics_a": {
"n_entities": 7,
"n_edges": 8,
"predicate_diversity": 4,
"type_diversity": 5,
"avg_degree": 2.29,
"largest_component": 7,
"largest_component_pct": 100.0
},
"metrics_b": {
"n_entities": 8,
"n_edges": 10,
"predicate_diversity": 6,
"type_diversity": 4,
"avg_degree": 2.5,
"largest_component": 8,
"largest_component_pct": 100.0
},
"extraction_a": "```json\n{\n \"entities\": [\n {\"name\": \"Editing Commands\", \"type\": \"Feature\"},\n {\"name\": \"geometry\", \"type\": \"Object\"},\n {\"name\": \"sections\", \"type\": \"Component\"},\n {\"name\": \"form\", \"type\": \"Structure\"},\n {\"name\": \"adding\", \"type\": \"Operation\"},\n {\"name\": \"combining\", \"type\": \"Operation\"},\n {\"name\": \"removing\", \"type\": \"Operation\"}\n ],\n \"edges\": [\n {\"subject\": \"Editing Commands\", \"predicate\": \"can edit\", \"object\": \"geometry\"},\n {\"subject\": \"geometry\", \"predicate\": \"can be further edited by\", \"object\": \"adding\"},\n {\"subject\": \"geometry\", \"predicate\": \"can be further edited by\", \"object\": \"combining\"},\n {\"subject\": \"geometry\", \"predicate\": \"can be further edited by\", \"object\": \"removing\"},\n {\"subject\": \"adding\", \"predicate\": \"modifies\", \"object\": \"sections\"},\n {\"subject\": \"combining\", \"predicate\": \"modifies\", \"object\": \"sections\"},\n {\"subject\": \"removing\", \"predicate\": \"modifies\", \"object\": \"sections\"},\n {\"subject\": \"sections\", \"predicate\": \"are part of\", \"object\": \"form\"}\n ]\n}\n```",
"extraction_b": "```json\n{\n \"entities\": [\n {\"name\": \"Editing Commands\", \"type\": \"concept\"},\n {\"name\": \"geometry\", \"type\": \"technical_object\"},\n {\"name\": \"form\", \"type\": \"technical_object\"},\n {\"name\": \"sections\", \"type\": \"technical_component\"},\n {\"name\": \"adding\", \"type\": \"operation\"},\n {\"name\": \"combining\", \"type\": \"operation\"},\n {\"name\": \"removing\", \"type\": \"operation\"},\n {\"name\": \"methods\", \"type\": \"concept\"}\n ],\n \"edges\": [\n {\"subject\": \"geometry\", \"predicate\": \"can be edited through\", \"object\": \"Editing Commands\"},\n {\"subject\": \"geometry\", \"predicate\": \"is part of\", \"object\": \"form\"},\n {\"subject\": \"adding\", \"predicate\": \"is a method for editing\", \"object\": \"geometry\"},\n {\"subject\": \"combining\", \"predicate\": \"is a method for editing\", \"object\": \"geometry\"},\n {\"subject\": \"removing\", \"predicate\": \"is a method for editing\", \"object\": \"geometry\"},\n {\"subject\": \"adding\", \"predicate\": \"operates on\", \"object\": \"sections\"},\n {\"subject\": \"combining\", \"predicate\": \"operates on\", \"object\": \"sections\"},\n {\"subject\": \"removing\", \"predicate\": \"operates on\", \"object\": \"sections\"},\n {\"subject\": \"sections\", \"predicate\": \"are part of\", \"object\": \"form\"},\n {\"subject\": \"methods\", \"predicate\": \"enable editing of\", \"object\": \"geometry\"}\n ]\n}\n```",
"metadata_b": {
"language": "en",
"char_length": 208,
"primary_format": "prose",
"structural_signals": {
"has_headings": false,
"has_bullet_lists": false,
"has_numbered_lists": false,
"has_tables": false,
"has_code_blocks": false,
"has_dates": false
},
"content_signals": {
"has_named_people": false,
"has_institutional_language": false,
"has_technical_terminology": true,
"has_first_person": false,
"has_quotations": false
},
"domain_class": "technical",
"one_sentence_summary": "This document outlines methods for editing geometry in a form."
}
},
{
"source": "10_Moving Parts.docx",
"size_bucket": "medium",
"doc_chars": 2847,
"metrics_a": {
"n_entities": 27,
"n_edges": 23,
"predicate_diversity": 14,
"type_diversity": 8,
"avg_degree": 1.7,
"largest_component": 11,
"largest_component_pct": 40.7
},
"metrics_b": {
"n_entities": 28,
"n_edges": 32,
"predicate_diversity": 13,
"type_diversity": 10,
"avg_degree": 2.29,
"largest_component": 21,
"largest_component_pct": 75.0
},
"extraction_a": "```json\n{\n \"entities\": [\n {\"name\": \"Hinge\", \"type\": \"Mechanism\"},\n {\"name\": \"Horizontal Hinge\", \"type\": \"Mechanism\"},\n {\"name\": \"Pin\", \"type\": \"Component\"},\n {\"name\": \"Knuckles\", \"type\": \"Component\"},\n {\"name\": \"Central axis\", \"type\": \"Component\"},\n {\"name\": \"Overhangs\", \"type\": \"Feature\"},\n {\"name\": \"Vertical Pin/Hinge\", \"type\": \"Mechanism\"},\n {\"name\": \"PLA\", \"type\": \"Material\"},\n {\"name\": \"Build platform\", \"type\": \"Equipment\"},\n {\"name\": \"Ball in Socket\", \"type\": \"Mechanism\"},\n {\"name\": \"Ball\", \"type\": \"Component\"},\n {\"name\": \"Socket\", \"type\": \"Component\"},\n {\"name\": \"Bridges\", \"type\": \"Feature\"},\n {\"name\": \"Gyroscopic effect\", \"type\": \"Property\"},\n {\"name\": \"Screw\", \"type\": \"Mechanism\"},\n {\"name\": \"Ridge\", \"type\": \"Component\"},\n {\"name\": \"Groove\", \"type\": \"Component\"},\n {\"name\": \"Helix\", \"type\": \"Shape\"},\n {\"name\": \"Chain links\", \"type\": \"Mechanism\"},\n {\"name\": \"Fabric\", \"type\": \"Mechanism\"},\n {\"name\": \"Tessellation\", \"type\": \"Property\"},\n {\"name\": \"Mesh\", \"type\": \"Structure\"},\n {\"name\": \"Nesting\", \"type\": \"Mechanism\"},\n {\"name\": \"Telescoping\", \"type\": \"Mechanism\"},\n {\"name\": \"Straight Telescope\", \"type\": \"Mechanism\"},\n {\"name\": \"Wedging Telescope\", \"type\": \"Mechanism\"},\n {\"name\": \"Guide pin\", \"type\": \"Component\"}\n ],\n \"edges\": [\n {\"subject\": \"Hinge\", \"predicate\": \"allows for\", \"object\": \"Rotation\"},\n {\"subject\": \"Hinge\", \"predicate\": \"comprises\", \"object\": \"Pin\"},\n {\"subject\": \"Hinge\", \"predicate\": \"comprises\", \"object\": \"Central axis\"},\n {\"subject\": \"Hinge\", \"predicate\": \"comprises\", \"object\": \"Knuckles\"},\n {\"subject\": \"Horizontal Hinge\", \"predicate\": \"is a type of\", \"object\": \"Hinge\"},\n {\"subject\": \"Horizontal Hinge\", \"predicate\": \"takes advantage of\", \"object\": \"Overhangs\"},\n {\"subject\": \"Vertical Pin/Hinge\", \"predicate\": \"can be printed in\", \"object\": \"PLA\"},\n {\"subject\": \"Vertical Pin/Hinge\", \"predicate\": \"can be printed without\", \"object\": \"Supports\"},\n {\"subject\": \"Ball in Socket\", \"predicate\": \"takes advantage of\", \"object\": \"Overhangs\"},\n {\"subject\": \"Ball in Socket\", \"predicate\": \"takes advantage of\", \"object\": \"Bridges\"},\n {\"subject\": \"Ball in Socket\", \"predicate\": \"comprises\", \"object\": \"Ball\"},\n {\"subject\": \"Ball in Socket\", \"predicate\": \"comprises\", \"object\": \"Socket\"},\n {\"subject\": \"Ball\", \"predicate\": \"rolls inside\", \"object\": \"Socket\"},\n {\"subject\": \"Ball in Socket\", \"predicate\": \"can produce\", \"object\": \"Gyroscopic effect\"},\n {\"subject\": \"Screw\", \"predicate\": \"comprises\", \"object\": \"Ridge\"},\n {\"subject\": \"Screw\", \"predicate\": \"comprises\", \"object\": \"Groove\"},\n {\"subject\": \"Ridge\", \"predicate\": \"fits in\", \"object\": \"Groove\"},\n {\"subject\": \"Groove\", \"predicate\": \"travels helically along\", \"object\": \"Axis\"},\n {\"subject\": \"Chain links\", \"predicate\": \"uses\", \"object\": \"Tessellation\"},\n {\"subject\": \"Fabric\", \"predicate\": \"uses\", \"object\": \"Tessellation\"},\n {\"subject\": \"Chain links\", \"predicate\": \"can form\", \"object\": \"Mesh\"},\n {\"subject\": \"Wedging Telescope\", \"predicate\": \"removes need for\", \"object\": \"Guide pin\"},\n {\"subject\": \"Wedging Telescope\", \"predicate\": \"wedges itself in\", \"object\": \"Open position\"}\n ]\n}\n```",
"extraction_b": "```json\n{\n \"entities\": [\n {\"name\": \"Hinge\", \"type\": \"Mechanical Component\"},\n {\"name\": \"Horizontal Hinge\", \"type\": \"Mechanical Component\"},\n {\"name\": \"Vertical Pin/Hinge\", \"type\": \"Mechanical Component\"},\n {\"name\": \"Pin\", \"type\": \"Component Part\"},\n {\"name\": \"Central Axis\", \"type\": \"Component Part\"},\n {\"name\": \"Knuckles\", \"type\": \"Component Part\"},\n {\"name\": \"Overhangs\", \"type\": \"3D Printing Feature\"},\n {\"name\": \"PLA\", \"type\": \"Material\"},\n {\"name\": \"Ball in Socket\", \"type\": \"Mechanical Component\"},\n {\"name\": \"Ball\", \"type\": \"Component Part\"},\n {\"name\": \"Socket\", \"type\": \"Component Part\"},\n {\"name\": \"Bridges\", \"type\": \"3D Printing Feature\"},\n {\"name\": \"Gyroscopic Effect\", \"type\": \"Mechanical Property\"},\n {\"name\": \"Screw\", \"type\": \"Mechanical Component\"},\n {\"name\": \"Ridge\", \"type\": \"Component Part\"},\n {\"name\": \"Groove\", \"type\": \"Component Part\"},\n {\"name\": \"Helix\", \"type\": \"Geometric Pattern\"},\n {\"name\": \"Chain Links\", \"type\": \"Mechanical Component\"},\n {\"name\": \"Fabric\", \"type\": \"Mechanical Component\"},\n {\"name\": \"Tessellation\", \"type\": \"Geometric Pattern\"},\n {\"name\": \"Mesh\", \"type\": \"Structure\"},\n {\"name\": \"Build Plate\", \"type\": \"3D Printing Equipment\"},\n {\"name\": \"Nesting\", \"type\": \"Assembly Method\"},\n {\"name\": \"Telescoping\", \"type\": \"Assembly Method\"},\n {\"name\": \"Straight Telescope\", \"type\": \"Mechanical Component\"},\n {\"name\": \"Wedging Telescope\", \"type\": \"Mechanical Component\"},\n {\"name\": \"Guide Pin\", \"type\": \"Component Part\"},\n {\"name\": \"3D Printing\", \"type\": \"Manufacturing Process\"}\n ],\n \"edges\": [\n {\"subject\": \"Hinge\", \"predicate\": \"allows for\", \"object\": \"Rotation\"},\n {\"subject\": \"Hinge\", \"predicate\": \"comprises\", \"object\": \"Pin\"},\n {\"subject\": \"Hinge\", \"predicate\": \"comprises\", \"object\": \"Central Axis\"},\n {\"subject\": \"Hinge\", \"predicate\": \"comprises\", \"object\": \"Knuckles\"},\n {\"subject\": \"Horizontal Hinge\", \"predicate\": \"is a type of\", \"object\": \"Hinge\"},\n {\"subject\": \"Horizontal Hinge\", \"predicate\": \"uses\", \"object\": \"Overhangs\"},\n {\"subject\": \"Vertical Pin/Hinge\", \"predicate\": \"is a type of\", \"object\": \"Hinge\"},\n {\"subject\": \"Vertical Pin/Hinge\", \"predicate\": \"can be printed with\", \"object\": \"PLA\"},\n {\"subject\": \"Vertical Pin/Hinge\", \"predicate\": \"takes advantage of\", \"object\": \"Overhangs\"},\n {\"subject\": \"Ball in Socket\", \"predicate\": \"takes advantage of\", \"object\": \"Overhangs\"},\n {\"subject\": \"Ball in Socket\", \"predicate\": \"takes advantage of\", \"object\": \"Bridges\"},\n {\"subject\": \"Ball in Socket\", \"predicate\": \"comprises\", \"object\": \"Ball\"},\n {\"subject\": \"Ball in Socket\", \"predicate\": \"comprises\", \"object\": \"Socket\"},\n {\"subject\": \"Ball\", \"predicate\": \"rolls inside\", \"object\": \"Socket\"},\n {\"subject\": \"Ball in Socket\", \"predicate\": \"can produce\", \"object\": \"Gyroscopic Effect\"},\n {\"subject\": \"Screw\", \"predicate\": \"comprises\", \"object\": \"Ridge\"},\n {\"subject\": \"Screw\", \"predicate\": \"comprises\", \"object\": \"Groove\"},\n {\"subject\": \"Ridge\", \"predicate\": \"travels helically along\", \"object\": \"Axis\"},\n {\"subject\": \"Chain Links\", \"predicate\": \"uses\", \"object\": \"Tessellation\"},\n {\"subject\": \"Fabric\", \"predicate\": \"uses\", \"object\": \"Tessellation\"},\n {\"subject\": \"Chain Links\", \"predicate\": \"can form\", \"object\": \"Mesh\"},\n {\"subject\": \"Telescoping\", \"predicate\": \"is a type of\", \"object\": \"Assembly Method\"},\n {\"subject\": \"Nesting\", \"predicate\": \"is a type of\", \"object\": \"Assembly Method\"},\n {\"subject\": \"Straight Telescope\", \"predicate\": \"is a type of\", \"object\": \"Telescoping\"},\n {\"subject\": \"Wedging Telescope\", \"predicate\": \"is a type of\", \"object\": \"Telescoping\"},\n {\"subject\": \"Wedging Telescope\", \"predicate\": \"removes need for\", \"object\": \"Guide Pin\"},\n {\"subject\": \"Wedging Telescope\", \"predicate\": \"wedges itself in\", \"object\": \"Open Position\"},\n {\"subject\": \"Hinge\", \"predicate\": \"can be created with\", \"object\": \"3D Printing\"},\n {\"subject\": \"Ball in Socket\", \"predi",
"metadata_b": {
"language": "en",
"char_length": 450,
"primary_format": "prose",
"structural_signals": {
"has_headings": false,
"has_bullet_lists": false,
"has_numbered_lists": false,
"has_tables": false,
"has_code_blocks": false,
"has_dates": false
},
"content_signals": {
"has_named_people": false,
"has_institutional_language": false,
"has_technical_terminology": true,
"has_first_person": false,
"has_quotations": false
},
"domain_class": "technical",
"one_sentence_summary": "The document describes various methods for creating mechanical components such as hinges, screws, and telescopes using 3D printing."
}
},
{
"source": "02_Point of Curves.docx",
"size_bucket": "medium",
"doc_chars": 1338,
"metrics_a": {
"n_entities": 28,
"n_edges": 33,
"predicate_diversity": 15,
"type_diversity": 10,
"avg_degree": 2.36,
"largest_component": 25,
"largest_component_pct": 89.3
},
"metrics_b": {
"n_entities": 31,
"n_edges": 41,
"predicate_diversity": 21,
"type_diversity": 6,
"avg_degree": 2.65,
"largest_component": 28,
"largest_component_pct": 90.3
},
"extraction_a": "```json\n{\n \"entities\": [\n {\"name\": \"Rhino\", \"type\": \"Software\"},\n {\"name\": \"NURBS\", \"type\": \"Mathematical Representation\"},\n {\"name\": \"Geometry\", \"type\": \"Concept\"},\n {\"name\": \"3-D geometry\", \"type\": \"Geometric Form\"},\n {\"name\": \"2-D line\", \"type\": \"Geometric Shape\"},\n {\"name\": \"circle\", \"type\": \"Geometric Shape\"},\n {\"name\": \"arc\", \"type\": \"Geometric Shape\"},\n {\"name\": \"curve\", \"type\": \"Geometric Shape\"},\n {\"name\": \"3-D organic free-form surface\", \"type\": \"Geometric Shape\"},\n {\"name\": \"solid\", \"type\": \"Geometric Shape\"},\n {\"name\": \"illustration\", \"type\": \"Process\"},\n {\"name\": \"animation\", \"type\": \"Process\"},\n {\"name\": \"manufacturing\", \"type\": \"Process\"},\n {\"name\": \"point\", \"type\": \"Geometric Element\"},\n {\"name\": \"XYZ intersection\", \"type\": \"Coordinate System\"},\n {\"name\": \"2D geometry\", \"type\": \"Geometric Form\"},\n {\"name\": \"3D geometry\", \"type\": \"Geometric Form\"},\n {\"name\": \"line\", \"type\": \"Geometric Shape\"},\n {\"name\": \"polygon\", \"type\": \"Geometric Shape\"},\n {\"name\": \"ellipse\", \"type\": \"Geometric Shape\"},\n {\"name\": \"helix\", \"type\": \"Geometric Shape\"},\n {\"name\": \"spiral\", \"type\": \"Geometric Shape\"},\n {\"name\": \"open curve\", \"type\": \"Curve Type\"},\n {\"name\": \"closed curve\", \"type\": \"Curve Type\"},\n {\"name\": \"planar curve\", \"type\": \"Curve Type\"},\n {\"name\": \"non-planar curve\", \"type\": \"Curve Type\"},\n {\"name\": \"single curve\", \"type\": \"Curve Category\"},\n {\"name\": \"polycurve\", \"type\": \"Curve Category\"}\n ],\n \"edges\": [\n {\"subject\": \"Rhino\", \"predicate\": \"represents all geometry with\", \"object\": \"NURBS\"},\n {\"subject\": \"NURBS\", \"predicate\": \"are mathematical representations of\", \"object\": \"3-D geometry\"},\n {\"subject\": \"NURBS\", \"predicate\": \"can accurately describe\", \"object\": \"2-D line\"},\n {\"subject\": \"NURBS\", \"predicate\": \"can accurately describe\", \"object\": \"circle\"},\n {\"subject\": \"NURBS\", \"predicate\": \"can accurately describe\", \"object\": \"arc\"},\n {\"subject\": \"NURBS\", \"predicate\": \"can accurately describe\", \"object\": \"curve\"},\n {\"subject\": \"NURBS\", \"predicate\": \"can accurately describe\", \"object\": \"3-D organic free-form surface\"},\n {\"subject\": \"NURBS\", \"predicate\": \"can accurately describe\", \"object\": \"solid\"},\n {\"subject\": \"NURBS models\", \"predicate\": \"can be used in\", \"object\": \"illustration\"},\n {\"subject\": \"NURBS models\", \"predicate\": \"can be used in\", \"object\": \"animation\"},\n {\"subject\": \"NURBS models\", \"predicate\": \"can be used in\", \"object\": \"manufacturing\"},\n {\"subject\": \"point\", \"predicate\": \"is the most basic element in\", \"object\": \"Rhino\"},\n {\"subject\": \"point\", \"predicate\": \"represents\", \"object\": \"XYZ intersection\"},\n {\"subject\": \"point\", \"predicate\": \"is used to define\", \"object\": \"2D geometry\"},\n {\"subject\": \"point\", \"predicate\": \"is used to define\", \"object\": \"3D geometry\"},\n {\"subject\": \"curve\", \"predicate\": \"is a 2D geometric form in Rhino\", \"object\": \"Geometry\"},\n {\"subject\": \"curve\", \"predicate\": \"includes\", \"object\": \"line\"},\n {\"subject\": \"curve\", \"predicate\": \"includes\", \"object\": \"circle\"},\n {\"subject\": \"curve\", \"predicate\": \"includes\", \"object\": \"arc\"},\n {\"subject\": \"curve\", \"predicate\": \"includes\", \"object\": \"polygon\"},\n {\"subject\": \"curve\", \"predicate\": \"includes\", \"object\": \"ellipse\"},\n {\"subject\": \"curve\", \"predicate\": \"includes\", \"object\": \"helix\"},\n {\"subject\": \"curve\", \"predicate\": \"includes\", \"object\": \"spiral\"},\n {\"subject\": \"curve\", \"predicate\": \"can be\", \"object\": \"open curve\"},\n {\"subject\": \"curve\", \"predicate\": \"can be\", \"object\": \"closed curve\"},\n {\"subject\": \"curve\", \"predicate\": \"can be\", \"object\": \"planar curve\"},\n {\"subject\": \"curve\", \"predicate\": \"can be\", \"object\": \"non-planar curve\"},\n {\"subject\": \"curve\", \"predicate\": \"are building blocks for\", \"object\": \"complex geometries\"},\n {\"subject\": \"2D geometry\", \"predicate\": \"can be described as\", \"object\": \"single curve\"},\n {\"subject\": \"2D geometry\", \"predicate\": \"can be descri",
"extraction_b": "```json\n{\n \"entities\": [\n {\"name\": \"Rhino\", \"type\": \"Software\"},\n {\"name\": \"Non-Uniform Rational B-Splines\", \"type\": \"Mathematical Representation\"},\n {\"name\": \"NURBS\", \"type\": \"Mathematical Representation\"},\n {\"name\": \"3-D geometry\", \"type\": \"Geometric Form\"},\n {\"name\": \"2-D line\", \"type\": \"Geometric Form\"},\n {\"name\": \"circle\", \"type\": \"Geometric Form\"},\n {\"name\": \"arc\", \"type\": \"Geometric Form\"},\n {\"name\": \"curve\", \"type\": \"Geometric Form\"},\n {\"name\": \"3-D organic free-form surface\", \"type\": \"Geometric Form\"},\n {\"name\": \"solid\", \"type\": \"Geometric Form\"},\n {\"name\": \"illustration\", \"type\": \"Application Domain\"},\n {\"name\": \"animation\", \"type\": \"Application Domain\"},\n {\"name\": \"manufacturing\", \"type\": \"Application Domain\"},\n {\"name\": \"point\", \"type\": \"Geometric Element\"},\n {\"name\": \"area\", \"type\": \"Geometric Property\"},\n {\"name\": \"volume\", \"type\": \"Geometric Property\"},\n {\"name\": \"XYZ intersection\", \"type\": \"Geometric Property\"},\n {\"name\": \"2D geometry\", \"type\": \"Geometric Form\"},\n {\"name\": \"3D geometry\", \"type\": \"Geometric Form\"},\n {\"name\": \"line\", \"type\": \"Geometric Form\"},\n {\"name\": \"polygon\", \"type\": \"Geometric Form\"},\n {\"name\": \"ellipse\", \"type\": \"Geometric Form\"},\n {\"name\": \"helix\", \"type\": \"Geometric Form\"},\n {\"name\": \"spiral\", \"type\": \"Geometric Form\"},\n {\"name\": \"open curve\", \"type\": \"Geometric Form\"},\n {\"name\": \"closed curve\", \"type\": \"Geometric Form\"},\n {\"name\": \"planar curve\", \"type\": \"Geometric Form\"},\n {\"name\": \"non-planar curve\", \"type\": \"Geometric Form\"},\n {\"name\": \"single curve\", \"type\": \"Geometric Form\"},\n {\"name\": \"polycurve\", \"type\": \"Geometric Form\"},\n {\"name\": \"end points\", \"type\": \"Geometric Property\"}\n ],\n \"edges\": [\n {\"subject\": \"Rhino\", \"predicate\": \"represents all geometry with\", \"object\": \"Non-Uniform Rational B-Splines\"},\n {\"subject\": \"NURBS\", \"predicate\": \"is abbreviation for\", \"object\": \"Non-Uniform Rational B-Splines\"},\n {\"subject\": \"NURBS\", \"predicate\": \"are mathematical representations of\", \"object\": \"3-D geometry\"},\n {\"subject\": \"NURBS\", \"predicate\": \"can describe\", \"object\": \"2-D line\"},\n {\"subject\": \"NURBS\", \"predicate\": \"can describe\", \"object\": \"circle\"},\n {\"subject\": \"NURBS\", \"predicate\": \"can describe\", \"object\": \"arc\"},\n {\"subject\": \"NURBS\", \"predicate\": \"can describe\", \"object\": \"curve\"},\n {\"subject\": \"NURBS\", \"predicate\": \"can describe\", \"object\": \"3-D organic free-form surface\"},\n {\"subject\": \"NURBS\", \"predicate\": \"can describe\", \"object\": \"solid\"},\n {\"subject\": \"NURBS\", \"predicate\": \"have property of\", \"object\": \"flexibility\"},\n {\"subject\": \"NURBS\", \"predicate\": \"have property of\", \"object\": \"accuracy\"},\n {\"subject\": \"NURBS models\", \"predicate\": \"can be used in\", \"object\": \"illustration\"},\n {\"subject\": \"NURBS models\", \"predicate\": \"can be used in\", \"object\": \"animation\"},\n {\"subject\": \"NURBS models\", \"predicate\": \"can be used in\", \"object\": \"manufacturing\"},\n {\"subject\": \"point\", \"predicate\": \"is most basic element in\", \"object\": \"Rhino\"},\n {\"subject\": \"point\", \"predicate\": \"represents\", \"object\": \"XYZ intersection\"},\n {\"subject\": \"point\", \"predicate\": \"does not have\", \"object\": \"area\"},\n {\"subject\": \"point\", \"predicate\": \"does not have\", \"object\": \"volume\"},\n {\"subject\": \"point\", \"predicate\": \"is used as\", \"object\": \"place holder\"},\n {\"subject\": \"point\", \"predicate\": \"is used to define\", \"object\": \"2D geometry\"},\n {\"subject\": \"point\", \"predicate\": \"is used to define\", \"object\": \"3D geometry\"},\n {\"subject\": \"curve\", \"predicate\": \"is 2D geometric form in\", \"object\": \"Rhino\"},\n {\"subject\": \"curve\", \"predicate\": \"includes\", \"object\": \"line\"},\n {\"subject\": \"curve\", \"predicate\": \"includes\", \"object\": \"arc\"},\n {\"subject\": \"curve\", \"predicate\": \"includes\", \"object\": \"circle\"},\n {\"subject\": \"curve\", \"predicate\": \"includes\", \"object\": \"polygon\"},\n {\"subject\": \"curve\", \"predicate\": \"includes\", \"object\": \"ellipse\"},\n ",
"metadata_b": {
"language": "en",
"char_length": 289,
"primary_format": "prose",
"structural_signals": {
"has_headings": false,
"has_bullet_lists": false,
"has_numbered_lists": false,
"has_tables": false,
"has_code_blocks": false,
"has_dates": false
},
"content_signals": {
"has_named_people": false,
"has_institutional_language": true,
"has_technical_terminology": true,
"has_first_person": false,
"has_quotations": false
},
"domain_class": "technical",
"one_sentence_summary": "The document describes the use of Non-Uniform Rational B-Splines (NURBS) in Rhino for representing and generating various geometric forms."
}
},
{
"source": "02_Point of Curves - AARON.docx",
"size_bucket": "medium",
"doc_chars": 2116,
"metrics_a": {
"n_entities": 43,
"n_edges": 55,
"predicate_diversity": 22,
"type_diversity": 13,
"avg_degree": 2.56,
"largest_component": 29,
"largest_component_pct": 67.4
},
"metrics_b": {
"n_entities": 42,
"n_edges": 58,
"predicate_diversity": 24,
"type_diversity": 14,
"avg_degree": 2.76,
"largest_component": 26,
"largest_component_pct": 61.9
},
"extraction_a": "```json\n{\n \"entities\": [\n {\"name\": \"Rhino\", \"type\": \"Software\"},\n {\"name\": \"NURBS\", \"type\": \"Mathematical Representation\"},\n {\"name\": \"Geometry\", \"type\": \"Concept\"},\n {\"name\": \"3-D geometry\", \"type\": \"Geometric Type\"},\n {\"name\": \"2-D line\", \"type\": \"Geometric Shape\"},\n {\"name\": \"circle\", \"type\": \"Geometric Shape\"},\n {\"name\": \"arc\", \"type\": \"Geometric Shape\"},\n {\"name\": \"curve\", \"type\": \"Geometric Shape\"},\n {\"name\": \"3-D organic free-form surface\", \"type\": \"Geometric Shape\"},\n {\"name\": \"solid\", \"type\": \"Geometric Shape\"},\n {\"name\": \"point\", \"type\": \"Geometric Element\"},\n {\"name\": \"3D Cartesian space\", \"type\": \"Coordinate System\"},\n {\"name\": \"X-axis\", \"type\": \"Coordinate Axis\"},\n {\"name\": \"Y-axis\", \"type\": \"Coordinate Axis\"},\n {\"name\": \"Z-axis\", \"type\": \"Coordinate Axis\"},\n {\"name\": \"2D geometry\", \"type\": \"Geometric Type\"},\n {\"name\": \"3D geometry\", \"type\": \"Geometric Type\"},\n {\"name\": \"line\", \"type\": \"Geometric Shape\"},\n {\"name\": \"polygon\", \"type\": \"Geometric Shape\"},\n {\"name\": \"ellipse\", \"type\": \"Geometric Shape\"},\n {\"name\": \"helix\", \"type\": \"Geometric Shape\"},\n {\"name\": \"spiral\", \"type\": \"Geometric Shape\"},\n {\"name\": \"open curve\", \"type\": \"Curve Type\"},\n {\"name\": \"closed curve\", \"type\": \"Curve Type\"},\n {\"name\": \"planar curve\", \"type\": \"Curve Type\"},\n {\"name\": \"non-planar curve\", \"type\": \"Curve Type\"},\n {\"name\": \"single curve\", \"type\": \"Curve Type\"},\n {\"name\": \"polycurve\", \"type\": \"Curve Type\"},\n {\"name\": \"polyline\", \"type\": \"Geometric Shape\"},\n {\"name\": \"illustration\", \"type\": \"Application\"},\n {\"name\": \"animation\", \"type\": \"Application\"},\n {\"name\": \"manufacturing\", \"type\": \"Application\"},\n {\"name\": \"Top viewport\", \"type\": \"Interface Element\"},\n {\"name\": \"small object template\", \"type\": \"Template\"},\n {\"name\": \"Circle layer\", \"type\": \"Layer\"},\n {\"name\": \"Star layer\", \"type\": \"Layer\"},\n {\"name\": \"Rectangle layer\", \"type\": \"Layer\"},\n {\"name\": \"Polyline layer\", \"type\": \"Layer\"},\n {\"name\": \"Curve layer\", \"type\": \"Layer\"},\n {\"name\": \"Shape layer\", \"type\": \"Layer\"},\n {\"name\": \"rounded rectangle\", \"type\": \"Geometric Shape\"},\n {\"name\": \"open polyline\", \"type\": \"Geometric Shape\"},\n {\"name\": \"closed shape\", \"type\": \"Geometric Shape\"}\n ],\n \"edges\": [\n {\"subject\": \"Rhino\", \"predicate\": \"represents all geometry with\", \"object\": \"NURBS\"},\n {\"subject\": \"NURBS\", \"predicate\": \"is a\", \"object\": \"Mathematical Representation\"},\n {\"subject\": \"NURBS\", \"predicate\": \"can describe\", \"object\": \"2-D line\"},\n {\"subject\": \"NURBS\", \"predicate\": \"can describe\", \"object\": \"circle\"},\n {\"subject\": \"NURBS\", \"predicate\": \"can describe\", \"object\": \"arc\"},\n {\"subject\": \"NURBS\", \"predicate\": \"can describe\", \"object\": \"curve\"},\n {\"subject\": \"NURBS\", \"predicate\": \"can describe\", \"object\": \"3-D organic free-form surface\"},\n {\"subject\": \"NURBS\", \"predicate\": \"can describe\", \"object\": \"solid\"},\n {\"subject\": \"NURBS\", \"predicate\": \"have\", \"object\": \"flexibility\"},\n {\"subject\": \"NURBS\", \"predicate\": \"have\", \"object\": \"accuracy\"},\n {\"subject\": \"NURBS models\", \"predicate\": \"can be used in\", \"object\": \"illustration\"},\n {\"subject\": \"NURBS models\", \"predicate\": \"can be used in\", \"object\": \"animation\"},\n {\"subject\": \"NURBS models\", \"predicate\": \"can be used in\", \"object\": \"manufacturing\"},\n {\"subject\": \"point\", \"predicate\": \"is the most basic element in\", \"object\": \"Rhino\"},\n {\"subject\": \"point\", \"predicate\": \"represents a position in\", \"object\": \"3D Cartesian space\"},\n {\"subject\": \"point\", \"predicate\": \"is expressed as coordinates along\", \"object\": \"X-axis\"},\n {\"subject\": \"point\", \"predicate\": \"is expressed as coordinates along\", \"object\": \"Y-axis\"},\n {\"subject\": \"point\", \"predicate\": \"is expressed as coordinates along\", \"object\": \"Z-axis\"},\n {\"subject\": \"points\", \"predicate\": \"are building blocks that define\", \"object\": \"2D geometry\"},\n {\"subject\": \"points\", \"predicate\": \"are building block",
"extraction_b": "```json\n{\n \"entities\": [\n {\"name\": \"Geometry\", \"type\": \"concept\"},\n {\"name\": \"Rhino\", \"type\": \"software\"},\n {\"name\": \"Non-Uniform Rational B-Splines\", \"type\": \"mathematical_concept\"},\n {\"name\": \"NURBS\", \"type\": \"mathematical_concept\"},\n {\"name\": \"3-D geometry\", \"type\": \"geometric_concept\"},\n {\"name\": \"2-D line\", \"type\": \"geometric_shape\"},\n {\"name\": \"circle\", \"type\": \"geometric_shape\"},\n {\"name\": \"arc\", \"type\": \"geometric_shape\"},\n {\"name\": \"curve\", \"type\": \"geometric_shape\"},\n {\"name\": \"3-D organic free-form surface\", \"type\": \"geometric_shape\"},\n {\"name\": \"solid\", \"type\": \"geometric_shape\"},\n {\"name\": \"point\", \"type\": \"geometric_element\"},\n {\"name\": \"3D Cartesian space\", \"type\": \"coordinate_system\"},\n {\"name\": \"X-axis\", \"type\": \"coordinate_axis\"},\n {\"name\": \"Y-axis\", \"type\": \"coordinate_axis\"},\n {\"name\": \"Z-axis\", \"type\": \"coordinate_axis\"},\n {\"name\": \"2D geometry\", \"type\": \"geometric_concept\"},\n {\"name\": \"3D geometry\", \"type\": \"geometric_concept\"},\n {\"name\": \"line\", \"type\": \"geometric_shape\"},\n {\"name\": \"polygon\", \"type\": \"geometric_shape\"},\n {\"name\": \"ellipse\", \"type\": \"geometric_shape\"},\n {\"name\": \"helix\", \"type\": \"geometric_shape\"},\n {\"name\": \"spiral\", \"type\": \"geometric_shape\"},\n {\"name\": \"open curve\", \"type\": \"curve_type\"},\n {\"name\": \"closed curve\", \"type\": \"curve_type\"},\n {\"name\": \"planar curve\", \"type\": \"curve_type\"},\n {\"name\": \"non-planar curve\", \"type\": \"curve_type\"},\n {\"name\": \"single curve\", \"type\": \"curve_type\"},\n {\"name\": \"polycurve\", \"type\": \"curve_type\"},\n {\"name\": \"polyline\", \"type\": \"geometric_shape\"},\n {\"name\": \"illustration\", \"type\": \"application_domain\"},\n {\"name\": \"animation\", \"type\": \"application_domain\"},\n {\"name\": \"manufacturing\", \"type\": \"application_domain\"},\n {\"name\": \"small object template\", \"type\": \"template\"},\n {\"name\": \"Top viewport\", \"type\": \"viewport\"},\n {\"name\": \"layer\", \"type\": \"organizational_element\"},\n {\"name\": \"Star\", \"type\": \"geometric_shape\"},\n {\"name\": \"Rectangle\", \"type\": \"geometric_shape\"},\n {\"name\": \"rounded corners\", \"type\": \"geometric_feature\"},\n {\"name\": \"Open Poly-line\", \"type\": \"geometric_shape\"},\n {\"name\": \"Closed Curve\", \"type\": \"geometric_shape\"},\n {\"name\": \"Shape\", \"type\": \"geometric_concept\"}\n ],\n \"edges\": [\n {\"subject\": \"Geometry\", \"predicate\": \"is represented with\", \"object\": \"NURBS\"},\n {\"subject\": \"NURBS\", \"predicate\": \"is also known as\", \"object\": \"Non-Uniform Rational B-Splines\"},\n {\"subject\": \"NURBS\", \"predicate\": \"are mathematical representations of\", \"object\": \"3-D geometry\"},\n {\"subject\": \"NURBS\", \"predicate\": \"can describe\", \"object\": \"2-D line\"},\n {\"subject\": \"NURBS\", \"predicate\": \"can describe\", \"object\": \"circle\"},\n {\"subject\": \"NURBS\", \"predicate\": \"can describe\", \"object\": \"arc\"},\n {\"subject\": \"NURBS\", \"predicate\": \"can describe\", \"object\": \"curve\"},\n {\"subject\": \"NURBS\", \"predicate\": \"can describe\", \"object\": \"3-D organic free-form surface\"},\n {\"subject\": \"NURBS\", \"predicate\": \"can describe\", \"object\": \"solid\"},\n {\"subject\": \"NURBS\", \"predicate\": \"have property of\", \"object\": \"flexibility\"},\n {\"subject\": \"NURBS\", \"predicate\": \"have property of\", \"object\": \"accuracy\"},\n {\"subject\": \"NURBS models\", \"predicate\": \"can be used in\", \"object\": \"illustration\"},\n {\"subject\": \"NURBS models\", \"predicate\": \"can be used in\", \"object\": \"animation\"},\n {\"subject\": \"NURBS models\", \"predicate\": \"can be used in\", \"object\": \"manufacturing\"},\n {\"subject\": \"point\", \"predicate\": \"is the most basic element in\", \"object\": \"Rhino\"},\n {\"subject\": \"point\", \"predicate\": \"represents\", \"object\": \"position in 3D Cartesian space\"},\n {\"subject\": \"point\", \"predicate\": \"is expressed as\", \"object\": \"coordinates\"},\n {\"subject\": \"coordinates\", \"predicate\": \"are along\", \"object\": \"X-axis\"},\n {\"subject\": \"coordinates\", \"predicate\": \"are along\", \"object\": \"Y-axis\"},\n {\"subject\": \"coordinates\", \"predicate\": \"are",
"metadata_b": {
"language": "en",
"char_length": 320,
"primary_format": "prose",
"structural_signals": {
"has_headings": false,
"has_bullet_lists": false,
"has_numbered_lists": false,
"has_tables": false,
"has_code_blocks": false,
"has_dates": false
},
"content_signals": {
"has_named_people": false,
"has_institutional_language": false,
"has_technical_terminology": true,
"has_first_person": false,
"has_quotations": false
},
"domain_class": "technical",
"one_sentence_summary": "Document describes the basics of Geometry in Rhino software"
}
},
{
"source": "2019-2020 Research and Creative Projects Awards Guidelines.FINAL.pdf",
"size_bucket": "large",
"doc_chars": 12000,
"metrics_a": {
"n_entities": 84,
"n_edges": 129,
"predicate_diversity": 72,
"type_diversity": 20,
"avg_degree": 3.07,
"largest_component": 7,
"largest_component_pct": 8.3
},
"metrics_b": {
"n_entities": 79,
"n_edges": 98,
"predicate_diversity": 55,
"type_diversity": 26,
"avg_degree": 2.48,
"largest_component": 12,
"largest_component_pct": 15.2
},
"extraction_a": "```json\n{\n \"entities\": [\n {\"name\": \"Research and Creative Projects Awards Program\", \"type\": \"Program\"},\n {\"name\": \"Faculty\", \"type\": \"Role\"},\n {\"name\": \"Division of Academic Affairs\", \"type\": \"Organization\"},\n {\"name\": \"Committee on Research, Awards and Leaves (CRAL)\", \"type\": \"Committee\"},\n {\"name\": \"Provost\", \"type\": \"Role\"},\n {\"name\": \"Office of Sponsored Programs\", \"type\": \"Organization\"},\n {\"name\": \"Carrie Corti\", \"type\": \"Person\"},\n {\"name\": \"Department Chair\", \"type\": \"Role\"},\n {\"name\": \"Dean\", \"type\": \"Role\"},\n {\"name\": \"Human Research Ethics Board (HREB)\", \"type\": \"Committee\"},\n {\"name\": \"Office of Academic Affairs\", \"type\": \"Organization\"},\n {\"name\": \"Student\", \"type\": \"Role\"},\n {\"name\": \"Tenure-track teaching faculty\", \"type\": \"Role\"},\n {\"name\": \"Term lecturers\", \"type\": \"Role\"},\n {\"name\": \"Tenured faculty\", \"type\": \"Role\"},\n {\"name\": \"Non-tenured tenure-track teaching faculty\", \"type\": \"Role\"},\n {\"name\": \"Payroll Department\", \"type\": \"Organization\"},\n {\"name\": \"IMS\", \"type\": \"Organization\"},\n {\"name\": \"IT\", \"type\": \"Organization\"},\n {\"name\": \"State of New York\", \"type\": \"Organization\"},\n {\"name\": \"Library\", \"type\": \"Organization\"},\n {\"name\": \"State University of New York at New Paltz\", \"type\": \"Organization\"},\n {\"name\": \"April 1, 2019\", \"type\": \"Date\"},\n {\"name\": \"March 31, 2020\", \"type\": \"Date\"},\n {\"name\": \"November 30, 2018\", \"type\": \"Date\"},\n {\"name\": \"December 21, 2018\", \"type\": \"Date\"},\n {\"name\": \"January 11, 2019\", \"type\": \"Date\"},\n {\"name\": \"January 18, 2019\", \"type\": \"Date\"},\n {\"name\": \"February 15, 2019\", \"type\": \"Date\"},\n {\"name\": \"March 1, 2019\", \"type\": \"Date\"},\n {\"name\": \"$2,500\", \"type\": \"Amount\"},\n {\"name\": \"$11.10 per hour\", \"type\": \"Rate\"},\n {\"name\": \"$0.54 per mile\", \"type\": \"Rate\"},\n {\"name\": \"Faculty research\", \"type\": \"Activity\"},\n {\"name\": \"Creative activity\", \"type\": \"Activity\"},\n {\"name\": \"Student learning\", \"type\": \"Concept\"},\n {\"name\": \"Faculty scholarship\", \"type\": \"Concept\"},\n {\"name\": \"New knowledge\", \"type\": \"Concept\"},\n {\"name\": \"Educational experience\", \"type\": \"Concept\"},\n {\"name\": \"External funding\", \"type\": \"Concept\"},\n {\"name\": \"InfoEd profile\", \"type\": \"Tool\"},\n {\"name\": \"Project narrative\", \"type\": \"Document\"},\n {\"name\": \"Cover sheet\", \"type\": \"Document\"},\n {\"name\": \"Budget\", \"type\": \"Document\"},\n {\"name\": \"Vitae\", \"type\": \"Document\"},\n {\"name\": \"Final report\", \"type\": \"Document\"},\n {\"name\": \"Evaluation criteria\", \"type\": \"Concept\"},\n {\"name\": \"Contribution\", \"type\": \"Criterion\"},\n {\"name\": \"Dissemination\", \"type\": \"Criterion\"},\n {\"name\": \"Methodology\", \"type\": \"Criterion\"},\n {\"name\": \"Capability\", \"type\": \"Criterion\"},\n {\"name\": \"Budget justification\", \"type\": \"Criterion\"},\n {\"name\": \"Tenured/permanent status\", \"type\": \"Criterion\"},\n {\"name\": \"Student Assistance\", \"type\": \"Budget Category\"},\n {\"name\": \"Travel\", \"type\": \"Budget Category\"},\n {\"name\": \"Supplies and materials\", \"type\": \"Budget Category\"},\n {\"name\": \"Other\", \"type\": \"Budget Category\"},\n {\"name\": \"Laboratory supplies\", \"type\": \"Item\"},\n {\"name\": \"Books and journals\", \"type\": \"Item\"},\n {\"name\": \"Artist supplies\", \"type\": \"Item\"},\n {\"name\": \"Office supplies\", \"type\": \"Item\"},\n {\"name\": \"Computer supplies\", \"type\": \"Item\"},\n {\"name\": \"Software\", \"type\": \"Item\"},\n {\"name\": \"Equipment\", \"type\": \"Item\"},\n {\"name\": \"Tape recorders\", \"type\": \"Equipment\"},\n {\"name\": \"Cameras\", \"type\": \"Equipment\"},\n {\"name\": \"Video recorders\", \"type\": \"Equipment\"},\n {\"name\": \"Duplicating costs\", \"type\": \"Expense\"},\n {\"name\": \"Postage\", \"type\": \"Expense\"},\n {\"name\": \"Publication related costs\", \"type\": \"Expense\"},\n {\"name\": \"Computer data searches\", \"type\": \"Expense\"},\n {\"name\": \"Human subjects\", \"type\": \"Concept\"},\n {\"name\": \"Human subjects training program\", \"type\": \"Program\"},\n {\"name\": \"Conference attendance\", \"type\": \"",
"extraction_b": "```json\n{\n \"entities\": [\n {\"name\": \"Research and Creative Projects Awards Program\", \"type\": \"Program\"},\n {\"name\": \"SUNY New Paltz\", \"type\": \"Institution\"},\n {\"name\": \"Division of Academic Affairs\", \"type\": \"Organizational Unit\"},\n {\"name\": \"Committee on Research, Awards and Leaves (CRAL)\", \"type\": \"Committee\"},\n {\"name\": \"Provost\", \"type\": \"Position\"},\n {\"name\": \"Office of Sponsored Programs\", \"type\": \"Organizational Unit\"},\n {\"name\": \"Carrie Corti\", \"type\": \"Person\"},\n {\"name\": \"cortic@newpaltz.edu\", \"type\": \"Email Address\"},\n {\"name\": \"Department Chair\", \"type\": \"Position\"},\n {\"name\": \"Dean\", \"type\": \"Position\"},\n {\"name\": \"Office of Academic Affairs\", \"type\": \"Organizational Unit\"},\n {\"name\": \"researchcreative@newpaltz.edu\", \"type\": \"Email Address\"},\n {\"name\": \"Human Research Ethics Board (HREB)\", \"type\": \"Committee\"},\n {\"name\": \"InfoEd\", \"type\": \"System\"},\n {\"name\": \"IMS\", \"type\": \"Organizational Unit\"},\n {\"name\": \"IT\", \"type\": \"Organizational Unit\"},\n {\"name\": \"Payroll Department\", \"type\": \"Organizational Unit\"},\n {\"name\": \"Library\", \"type\": \"Organizational Unit\"},\n {\"name\": \"State of New York\", \"type\": \"Government Entity\"},\n {\"name\": \"2019-2020\", \"type\": \"Academic Year\"},\n {\"name\": \"April 1, 2019\", \"type\": \"Date\"},\n {\"name\": \"March 31, 2020\", \"type\": \"Date\"},\n {\"name\": \"November 30, 2018\", \"type\": \"Date\"},\n {\"name\": \"December 21, 2018\", \"type\": \"Date\"},\n {\"name\": \"January 11, 2019\", \"type\": \"Date\"},\n {\"name\": \"January 18, 2019\", \"type\": \"Date\"},\n {\"name\": \"February 15, 2019\", \"type\": \"Date\"},\n {\"name\": \"March 1, 2019\", \"type\": \"Date\"},\n {\"name\": \"Full-time tenure-track teaching faculty\", \"type\": \"Eligible Applicant\"},\n {\"name\": \"Full-time term lecturers\", \"type\": \"Eligible Applicant\"},\n {\"name\": \"$2,500\", \"type\": \"Monetary Amount\"},\n {\"name\": \"$11.10 per hour\", \"type\": \"Wage Rate\"},\n {\"name\": \"$0.54 per mile\", \"type\": \"Mileage Rate\"},\n {\"name\": \"x3281\", \"type\": \"Phone Extension\"},\n {\"name\": \"x3282\", \"type\": \"Phone Extension\"},\n {\"name\": \"Project narrative\", \"type\": \"Document Component\"},\n {\"name\": \"Cover sheet\", \"type\": \"Document Component\"},\n {\"name\": \"Budget\", \"type\": \"Document Component\"},\n {\"name\": \"Current Vitae\", \"type\": \"Document Component\"},\n {\"name\": \"Department chair evaluation\", \"type\": \"Document Component\"},\n {\"name\": \"Dean evaluation\", \"type\": \"Document Component\"},\n {\"name\": \"Student Assistant\", \"type\": \"Position\"},\n {\"name\": \"Contribution criterion\", \"type\": \"Evaluation Criterion\"},\n {\"name\": \"Dissemination criterion\", \"type\": \"Evaluation Criterion\"},\n {\"name\": \"Methodology criterion\", \"type\": \"Evaluation Criterion\"},\n {\"name\": \"Capability criterion\", \"type\": \"Evaluation Criterion\"},\n {\"name\": \"Budget criterion\", \"type\": \"Evaluation Criterion\"},\n {\"name\": \"Tenured/permanent criterion\", \"type\": \"Evaluation Criterion\"},\n {\"name\": \"HREB criterion\", \"type\": \"Evaluation Criterion\"},\n {\"name\": \"Previous support criterion\", \"type\": \"Evaluation Criterion\"},\n {\"name\": \"Faculty research\", \"type\": \"Activity\"},\n {\"name\": \"Creative activity\", \"type\": \"Activity\"},\n {\"name\": \"Student learning\", \"type\": \"Outcome\"},\n {\"name\": \"New knowledge\", \"type\": \"Outcome\"},\n {\"name\": \"Educational experience\", \"type\": \"Outcome\"},\n {\"name\": \"Personnel costs\", \"type\": \"Budget Category\"},\n {\"name\": \"Travel costs\", \"type\": \"Budget Category\"},\n {\"name\": \"Supplies and materials\", \"type\": \"Budget Category\"},\n {\"name\": \"Other costs\", \"type\": \"Budget Category\"},\n {\"name\": \"Laboratory supplies\", \"type\": \"Eligible Expense\"},\n {\"name\": \"Books and journals\", \"type\": \"Eligible Expense\"},\n {\"name\": \"Artist supplies\", \"type\": \"Eligible Expense\"},\n {\"name\": \"Office supplies\", \"type\": \"Eligible Expense\"},\n {\"name\": \"Computer supplies and software\", \"type\": \"Eligible Expense\"},\n {\"name\": \"Small equipment items\", \"type\": \"Eligible Expense\"},\n {\"name\": \"Duplica",
"metadata_b": {
"language": "en",
"char_length": 1420,
"primary_format": "prose",
"structural_signals": {
"has_headings": true,
"has_bullet_lists": false,
"has_numbered_lists": false,
"has_tables": false,
"has_code_blocks": false,
"has_dates": true
},
"content_signals": {
"has_named_people": true,
"has_institutional_language": true,
"has_technical_terminology": true,
"has_first_person": true,
"has_quotations": false
},
"domain_class": "administrative",
"one_sentence_summary": "Document outlines guidelines for a research and creative projects awards program at SUNY New Paltz"
}
},
{
"source": "2021 - DDF 320 - Design Intents Syllabus.pdf",
"size_bucket": "large",
"doc_chars": 12000,
"metrics_a": {
"n_entities": 57,
"n_edges": 51,
"predicate_diversity": 29,
"type_diversity": 23,
"avg_degree": 1.79,
"largest_component": 18,
"largest_component_pct": 31.6
},
"metrics_b": {
"n_entities": 142,
"n_edges": 175,
"predicate_diversity": 57,
"type_diversity": 33,
"avg_degree": 2.46,
"largest_component": 84,
"largest_component_pct": 59.2
},
"extraction_a": "```json\n{\n \"entities\": [\n {\"name\": \"Design Intents DDF 320-01\", \"type\": \"Course\"},\n {\"name\": \"Aaron Nelson\", \"type\": \"Instructor\"},\n {\"name\": \"Spring 2021\", \"type\": \"Semester\"},\n {\"name\": \"EIH 104\", \"type\": \"Location\"},\n {\"name\": \"HVAMC\", \"type\": \"Building\"},\n {\"name\": \"nelsona@newpaltz.edu\", \"type\": \"Email\"},\n {\"name\": \"human-centered design\", \"type\": \"Design Principle\"},\n {\"name\": \"prototype driven design practice\", \"type\": \"Design Principle\"},\n {\"name\": \"mindfulness of process\", \"type\": \"Design Principle\"},\n {\"name\": \"design processes\", \"type\": \"Topic\"},\n {\"name\": \"innovation methodologies\", \"type\": \"Topic\"},\n {\"name\": \"need finding\", \"type\": \"Topic\"},\n {\"name\": \"human factors\", \"type\": \"Topic\"},\n {\"name\": \"visualization\", \"type\": \"Topic\"},\n {\"name\": \"rapid prototyping\", \"type\": \"Topic\"},\n {\"name\": \"team dynamics\", \"type\": \"Topic\"},\n {\"name\": \"storytelling\", \"type\": \"Topic\"},\n {\"name\": \"project leadership\", \"type\": \"Topic\"},\n {\"name\": \"design research\", \"type\": \"Skill\"},\n {\"name\": \"iterative ideation\", \"type\": \"Skill\"},\n {\"name\": \"presentation\", \"type\": \"Skill\"},\n {\"name\": \"DDF minor\", \"type\": \"Academic Program\"},\n {\"name\": \"Homeworks\", \"type\": \"Assignment\"},\n {\"name\": \"Reading Responses\", \"type\": \"Assignment\"},\n {\"name\": \"Group Presentations\", \"type\": \"Assignment\"},\n {\"name\": \"Quizzes\", \"type\": \"Assignment\"},\n {\"name\": \"Class Discussion Participation\", \"type\": \"Assignment\"},\n {\"name\": \"Midterm Presentation\", \"type\": \"Assignment\"},\n {\"name\": \"In Progress Design Review\", \"type\": \"Assignment\"},\n {\"name\": \"Final Prototype Presentations\", \"type\": \"Assignment\"},\n {\"name\": \"Blackboard\", \"type\": \"Platform\"},\n {\"name\": \"Don Norman\", \"type\": \"Author\"},\n {\"name\": \"Design Thinking\", \"type\": \"Concept\"},\n {\"name\": \"Dieter Rams\", \"type\": \"Author\"},\n {\"name\": \"Principals for Good Design\", \"type\": \"Concept\"},\n {\"name\": \"IDEO\", \"type\": \"Organization\"},\n {\"name\": \"10 Step Design Process\", \"type\": \"Framework\"},\n {\"name\": \"SELUX\", \"type\": \"Client\"},\n {\"name\": \"Problem Statement\", \"type\": \"Deliverable\"},\n {\"name\": \"Stakeholder Maps\", \"type\": \"Deliverable\"},\n {\"name\": \"Storyboards\", \"type\": \"Deliverable\"},\n {\"name\": \"Mockups\", \"type\": \"Deliverable\"},\n {\"name\": \"Pitch\", \"type\": \"Deliverable\"},\n {\"name\": \"Disability Resource Center\", \"type\": \"Institution\"},\n {\"name\": \"Student Union\", \"type\": \"Location\"},\n {\"name\": \"Deanna Knapp\", \"type\": \"Person\"},\n {\"name\": \"Jean Vizvary\", \"type\": \"Person\"},\n {\"name\": \"Sojourner Truth Library\", \"type\": \"Institution\"},\n {\"name\": \"Office of Veteran and Military Services\", \"type\": \"Institution\"},\n {\"name\": \"SUNY New Paltz\", \"type\": \"Institution\"},\n {\"name\": \"Student Conduct Office\", \"type\": \"Institution\"},\n {\"name\": \"March 29th\", \"type\": \"Date\"},\n {\"name\": \"April 19-May 3\", \"type\": \"Date Range\"},\n {\"name\": \"May 7\", \"type\": \"Date\"},\n {\"name\": \"academic integrity\", \"type\": \"Policy\"},\n {\"name\": \"attendance policy\", \"type\": \"Policy\"},\n {\"name\": \"public health policy\", \"type\": \"Policy\"}\n ],\n \"edges\": [\n {\"subject\": \"Design Intents DDF 320-01\", \"predicate\": \"is taught by\", \"object\": \"Aaron Nelson\"},\n {\"subject\": \"Design Intents DDF 320-01\", \"predicate\": \"is offered in\", \"object\": \"Spring 2021\"},\n {\"subject\": \"Design Intents DDF 320-01\", \"predicate\": \"is held at\", \"object\": \"EIH 104\"},\n {\"subject\": \"Aaron Nelson\", \"predicate\": \"has office hours\", \"object\": \"M-R 10am \u2013 12pm\"},\n {\"subject\": \"Aaron Nelson\", \"predicate\": \"can be contacted at\", \"object\": \"nelsona@newpaltz.edu\"},\n {\"subject\": \"Design Intents DDF 320-01\", \"predicate\": \"introduces\", \"object\": \"collaborative team research\"},\n {\"subject\": \"Design Intents DDF 320-01\", \"predicate\": \"introduces\", \"object\": \"interdisciplinary practices\"},\n {\"subject\": \"Design Intents DDF 320-01\", \"predicate\": \"covers\", \"object\": \"human-centered design\"},\n {\"subject\": \"Design Intents DDF 32",
"extraction_b": "```json\n{\n \"entities\": [\n {\"name\": \"Design Intents\", \"type\": \"course\"},\n {\"name\": \"DDF 320-01\", \"type\": \"course_code\"},\n {\"name\": \"Spring 2021\", \"type\": \"semester\"},\n {\"name\": \"Aaron Nelson\", \"type\": \"person\"},\n {\"name\": \"EIH 104\", \"type\": \"location\"},\n {\"name\": \"HVAMC\", \"type\": \"location\"},\n {\"name\": \"nelsona@newpaltz.edu\", \"type\": \"email\"},\n {\"name\": \"SUNY New Paltz\", \"type\": \"institution\"},\n {\"name\": \"human-centered design\", \"type\": \"design_concept\"},\n {\"name\": \"prototype driven design practice\", \"type\": \"design_concept\"},\n {\"name\": \"fail fast\", \"type\": \"design_concept\"},\n {\"name\": \"mindfulness of process\", \"type\": \"design_concept\"},\n {\"name\": \"design processes\", \"type\": \"topic\"},\n {\"name\": \"innovation methodologies\", \"type\": \"topic\"},\n {\"name\": \"need finding\", \"type\": \"topic\"},\n {\"name\": \"human factors\", \"type\": \"topic\"},\n {\"name\": \"visualization\", \"type\": \"topic\"},\n {\"name\": \"rapid prototyping\", \"type\": \"topic\"},\n {\"name\": \"team dynamics\", \"type\": \"topic\"},\n {\"name\": \"storytelling\", \"type\": \"topic\"},\n {\"name\": \"project leadership\", \"type\": \"topic\"},\n {\"name\": \"design studio\", \"type\": \"course_format\"},\n {\"name\": \"design research\", \"type\": \"skill\"},\n {\"name\": \"iterative ideation\", \"type\": \"skill\"},\n {\"name\": \"presentation\", \"type\": \"skill\"},\n {\"name\": \"DDF minor\", \"type\": \"academic_program\"},\n {\"name\": \"Blackboard\", \"type\": \"platform\"},\n {\"name\": \"Homeworks\", \"type\": \"assignment_type\"},\n {\"name\": \"Reading Responses\", \"type\": \"assignment_type\"},\n {\"name\": \"Group Presentations\", \"type\": \"assignment_type\"},\n {\"name\": \"Quizzes\", \"type\": \"assignment_type\"},\n {\"name\": \"Class Discussion\", \"type\": \"assignment_type\"},\n {\"name\": \"Midterm Presentation\", \"type\": \"assignment_type\"},\n {\"name\": \"In Progress Design Review\", \"type\": \"assignment_type\"},\n {\"name\": \"Final Prototype Presentations\", \"type\": \"assignment_type\"},\n {\"name\": \"Selux\", \"type\": \"client\"},\n {\"name\": \"March 29th\", \"type\": \"date\"},\n {\"name\": \"Disability Resource Center\", \"type\": \"institution\"},\n {\"name\": \"Student Union\", \"type\": \"location\"},\n {\"name\": \"Deanna Knapp\", \"type\": \"person\"},\n {\"name\": \"knappd@newpaltz.edu\", \"type\": \"email\"},\n {\"name\": \"Jean Vizvary\", \"type\": \"person\"},\n {\"name\": \"vizvaryj@newpaltz.edu\", \"type\": \"email\"},\n {\"name\": \"Office of Veteran and Military Services\", \"type\": \"institution\"},\n {\"name\": \"OVMS\", \"type\": \"institution\"},\n {\"name\": \"Student Evaluation of Instruction\", \"type\": \"assessment\"},\n {\"name\": \"SEI\", \"type\": \"assessment\"},\n {\"name\": \"April 19-May 3\", \"type\": \"date_range\"},\n {\"name\": \"Don Norman\", \"type\": \"person\"},\n {\"name\": \"Design Thinking\", \"type\": \"topic\"},\n {\"name\": \"Dieter Rams\", \"type\": \"person\"},\n {\"name\": \"Principals for Good Design\", \"type\": \"design_concept\"},\n {\"name\": \"IDEO\", \"type\": \"organization\"},\n {\"name\": \"10 Step Design Process\", \"type\": \"framework\"},\n {\"name\": \"Frameworks for Design\", \"type\": \"topic\"},\n {\"name\": \"Problem Statement\", \"type\": \"deliverable\"},\n {\"name\": \"Brainstorming Sessions\", \"type\": \"activity\"},\n {\"name\": \"Stakeholder Maps\", \"type\": \"deliverable\"},\n {\"name\": \"Concept Generation\", \"type\": \"activity\"},\n {\"name\": \"Storyboards\", \"type\": \"deliverable\"},\n {\"name\": \"Mockups\", \"type\": \"deliverable\"},\n {\"name\": \"Pitch\", \"type\": \"deliverable\"},\n {\"name\": \"Group Assessment 1\", \"type\": \"assessment\"},\n {\"name\": \"Group Assessment 2\", \"type\": \"assessment\"},\n {\"name\": \"Group Assessment 3\", \"type\": \"assessment\"},\n {\"name\": \"Sojourner Truth Library\", \"type\": \"institution\"},\n {\"name\": \"Student Conduct Office\", \"type\": \"institution\"},\n {\"name\": \"Computer Services\", \"type\": \"institution\"},\n {\"name\": \"Acceptable Uses and Privacy Policy\", \"type\": \"policy\"},\n {\"name\": \"Academic Integrity Policy\", \"type\": \"policy\"},\n {\"name\": \"Student Conduct Code\", \"type\": \"policy\"},\n {\"name\": \"The Pledge\", \"type\": \"policy\"},\n {",
"metadata_b": {
"language": "en",
"char_length": 1824,
"primary_format": "prose",
"structural_signals": {
"has_headings": true,
"has_bullet_lists": true,
"has_numbered_lists": false,
"has_tables": false,
"has_code_blocks": false,
"has_dates": false
},
"content_signals": {
"has_named_people": true,
"has_institutional_language": true,
"has_technical_terminology": true,
"has_first_person": true,
"has_quotations": false
},
"domain_class": "educational",
"one_sentence_summary": "This document outlines a course syllabus for a design-focused class in Spring 2021."
}
},
{
"source": "2016 - DDF 205 - CAD I Syllabus.pdf",
"size_bucket": "large",
"doc_chars": 11012,
"metrics_a": {
"n_entities": 23,
"n_edges": 21,
"predicate_diversity": 15,
"type_diversity": 13,
"avg_degree": 1.83,
"largest_component": 18,
"largest_component_pct": 78.3
},
"metrics_b": {
"n_entities": 61,
"n_edges": 51,
"predicate_diversity": 27,
"type_diversity": 24,
"avg_degree": 1.67,
"largest_component": 40,
"largest_component_pct": 65.6
},
"extraction_a": "```json\n{\n \"entities\": [\n {\"name\": \"Computer Aided Design DDF 205-03\", \"type\": \"Course\"},\n {\"name\": \"Aaron Nelson\", \"type\": \"Instructor\"},\n {\"name\": \"Fall 2016\", \"type\": \"Semester\"},\n {\"name\": \"SAB 100 \u2013 HVMAC\", \"type\": \"Location\"},\n {\"name\": \"Rhino3D\", \"type\": \"Software\"},\n {\"name\": \"3D printing\", \"type\": \"Technology\"},\n {\"name\": \"NURBS geometry\", \"type\": \"Concept\"},\n {\"name\": \"CAD software\", \"type\": \"Software\"},\n {\"name\": \"digital fabrication\", \"type\": \"Skill\"},\n {\"name\": \"Desktop FDM printing\", \"type\": \"Technology\"},\n {\"name\": \"Rhino 5 Level 1 Training Guide\", \"type\": \"Textbook\"},\n {\"name\": \"Worksheets and Homework\", \"type\": \"Assessment\"},\n {\"name\": \"Project 1\", \"type\": \"Assessment\"},\n {\"name\": \"Midterm\", \"type\": \"Assessment\"},\n {\"name\": \"Project 2\", \"type\": \"Assessment\"},\n {\"name\": \"Project 3\", \"type\": \"Assessment\"},\n {\"name\": \"Attendance and Participation\", \"type\": \"Assessment\"},\n {\"name\": \"Disability Resource Center\", \"type\": \"Institution\"},\n {\"name\": \"Deanna Knapp\", \"type\": \"Person\"},\n {\"name\": \"Sojourner Truth Library\", \"type\": \"Institution\"},\n {\"name\": \"Office of Veteran and Military Services\", \"type\": \"Institution\"},\n {\"name\": \"SUNY New Paltz\", \"type\": \"Institution\"},\n {\"name\": \"Student Evaluation of Instruction\", \"type\": \"Process\"}\n ],\n \"edges\": [\n {\"subject\": \"Computer Aided Design DDF 205-03\", \"predicate\": \"is taught by\", \"object\": \"Aaron Nelson\"},\n {\"subject\": \"Computer Aided Design DDF 205-03\", \"predicate\": \"is offered in\", \"object\": \"Fall 2016\"},\n {\"subject\": \"Computer Aided Design DDF 205-03\", \"predicate\": \"meets at\", \"object\": \"SAB 100 \u2013 HVMAC\"},\n {\"subject\": \"Computer Aided Design DDF 205-03\", \"predicate\": \"uses software\", \"object\": \"Rhino3D\"},\n {\"subject\": \"Computer Aided Design DDF 205-03\", \"predicate\": \"teaches technology\", \"object\": \"3D printing\"},\n {\"subject\": \"Computer Aided Design DDF 205-03\", \"predicate\": \"covers concept\", \"object\": \"NURBS geometry\"},\n {\"subject\": \"Computer Aided Design DDF 205-03\", \"predicate\": \"uses textbook\", \"object\": \"Rhino 5 Level 1 Training Guide\"},\n {\"subject\": \"Computer Aided Design DDF 205-03\", \"predicate\": \"develops skill\", \"object\": \"digital fabrication\"},\n {\"subject\": \"Computer Aided Design DDF 205-03\", \"predicate\": \"teaches technology\", \"object\": \"Desktop FDM printing\"},\n {\"subject\": \"Computer Aided Design DDF 205-03\", \"predicate\": \"assesses through\", \"object\": \"Worksheets and Homework\"},\n {\"subject\": \"Computer Aided Design DDF 205-03\", \"predicate\": \"assesses through\", \"object\": \"Project 1\"},\n {\"subject\": \"Computer Aided Design DDF 205-03\", \"predicate\": \"assesses through\", \"object\": \"Midterm\"},\n {\"subject\": \"Computer Aided Design DDF 205-03\", \"predicate\": \"assesses through\", \"object\": \"Project 2\"},\n {\"subject\": \"Computer Aided Design DDF 205-03\", \"predicate\": \"assesses through\", \"object\": \"Project 3\"},\n {\"subject\": \"Computer Aided Design DDF 205-03\", \"predicate\": \"assesses through\", \"object\": \"Attendance and Participation\"},\n {\"subject\": \"Aaron Nelson\", \"predicate\": \"has office hours\", \"object\": \"M-R 11am \u2013 12pm\"},\n {\"subject\": \"Aaron Nelson\", \"predicate\": \"email\", \"object\": \"nelsona@newpaltz.edu\"},\n {\"subject\": \"Disability Resource Center\", \"predicate\": \"provides accommodations for\", \"object\": \"Computer Aided Design DDF 205-03\"},\n {\"subject\": \"Deanna Knapp\", \"predicate\": \"works at\", \"object\": \"Disability Resource Center\"},\n {\"subject\": \"Sojourner Truth Library\", \"predicate\": \"provides resources on\", \"object\": \"plagiarism\"},\n {\"subject\": \"SUNY New Paltz\", \"predicate\": \"operates\", \"object\": \"Office of Veteran and Military Services\"}\n ]\n}\n```",
"extraction_b": "```json\n{\n \"entities\": [\n {\"name\": \"Computer Aided Design\", \"type\": \"Course\"},\n {\"name\": \"DDF 205-03\", \"type\": \"Course Code\"},\n {\"name\": \"Fall 2016\", \"type\": \"Semester\"},\n {\"name\": \"Aaron Nelson\", \"type\": \"Person\"},\n {\"name\": \"SAB 100\", \"type\": \"Location\"},\n {\"name\": \"HVMAC\", \"type\": \"Location\"},\n {\"name\": \"nelsona@newpaltz.edu\", \"type\": \"Email\"},\n {\"name\": \"Rhino3D\", \"type\": \"Software\"},\n {\"name\": \"CAD software\", \"type\": \"Technology\"},\n {\"name\": \"NURBS geometry\", \"type\": \"Concept\"},\n {\"name\": \"3D printing\", \"type\": \"Technology\"},\n {\"name\": \"Rhino 5 Level 1 Training Guide\", \"type\": \"Textbook\"},\n {\"name\": \"digital fabrication software\", \"type\": \"Technology\"},\n {\"name\": \"digital fabrication hardware\", \"type\": \"Technology\"},\n {\"name\": \"spatial awareness\", \"type\": \"Learning Outcome\"},\n {\"name\": \"scales\", \"type\": \"Concept\"},\n {\"name\": \"measuring devices\", \"type\": \"Tool\"},\n {\"name\": \"three dimensional form\", \"type\": \"Concept\"},\n {\"name\": \"digital modeling\", \"type\": \"Concept\"},\n {\"name\": \"three dimensional design principals\", \"type\": \"Concept\"},\n {\"name\": \"Desktop FDM printing\", \"type\": \"Technology\"},\n {\"name\": \"tolerances\", \"type\": \"Concept\"},\n {\"name\": \"critical thinking\", \"type\": \"Skill\"},\n {\"name\": \"problem solving skills\", \"type\": \"Skill\"},\n {\"name\": \"Worksheets and Homework\", \"type\": \"Assessment\"},\n {\"name\": \"Project 1\", \"type\": \"Assessment\"},\n {\"name\": \"Midterm\", \"type\": \"Assessment\"},\n {\"name\": \"Project 2\", \"type\": \"Assessment\"},\n {\"name\": \"Project 3\", \"type\": \"Assessment\"},\n {\"name\": \"Attendance and Participation\", \"type\": \"Assessment\"},\n {\"name\": \"Grade A\", \"type\": \"Grade\"},\n {\"name\": \"Grade B\", \"type\": \"Grade\"},\n {\"name\": \"Grade C\", \"type\": \"Grade\"},\n {\"name\": \"Grade D\", \"type\": \"Grade\"},\n {\"name\": \"Grade F\", \"type\": \"Grade\"},\n {\"name\": \"November 4th\", \"type\": \"Date\"},\n {\"name\": \"Academic Integrity\", \"type\": \"Policy\"},\n {\"name\": \"Cheating\", \"type\": \"Violation\"},\n {\"name\": \"Forgery\", \"type\": \"Violation\"},\n {\"name\": \"Plagiarism\", \"type\": \"Violation\"},\n {\"name\": \"New Paltz\", \"type\": \"Institution\"},\n {\"name\": \"Disability Resource Center\", \"type\": \"Organization\"},\n {\"name\": \"Student Union\", \"type\": \"Location\"},\n {\"name\": \"Room 210\", \"type\": \"Location\"},\n {\"name\": \"Deanna Knapp\", \"type\": \"Person\"},\n {\"name\": \"knappd@newpaltz.edu\", \"type\": \"Email\"},\n {\"name\": \"Accommodation Memo\", \"type\": \"Document\"},\n {\"name\": \"Computer Services\", \"type\": \"Organization\"},\n {\"name\": \"Acceptable Uses and Privacy Policy\", \"type\": \"Policy\"},\n {\"name\": \"Student Evaluation of Instruction\", \"type\": \"Assessment\"},\n {\"name\": \"SEI\", \"type\": \"Assessment\"},\n {\"name\": \"November 30\u2013December 10\", \"type\": \"Date Range\"},\n {\"name\": \"April 21-May 5\", \"type\": \"Date Range\"},\n {\"name\": \"Office of Veteran and Military Services\", \"type\": \"Organization\"},\n {\"name\": \"OVMS\", \"type\": \"Organization\"},\n {\"name\": \"SUNY New Paltz\", \"type\": \"Institution\"},\n {\"name\": \"Sojourner Truth Library\", \"type\": \"Organization\"},\n {\"name\": \"M-R 9:30 \u2013 10:45 am\", \"type\": \"Schedule\"},\n {\"name\": \"M-R 11am \u2013 12pm\", \"type\": \"Schedule\"},\n {\"name\": \"3 Credit Hours\", \"type\": \"Credit\"},\n {\"name\": \"rapid manufacturing\", \"type\": \"Concept\"}\n ],\n \"edges\": [\n {\"subject\": \"Computer Aided Design\", \"predicate\": \"has course code\", \"object\": \"DDF 205-03\"},\n {\"subject\": \"Computer Aided Design\", \"predicate\": \"offered in\", \"object\": \"Fall 2016\"},\n {\"subject\": \"Computer Aided Design\", \"predicate\": \"taught by\", \"object\": \"Aaron Nelson\"},\n {\"subject\": \"Computer Aided Design\", \"predicate\": \"meets at location\", \"object\": \"SAB 100\"},\n {\"subject\": \"Computer Aided Design\", \"predicate\": \"meets at location\", \"object\": \"HVMAC\"},\n {\"subject\": \"Computer Aided Design\", \"predicate\": \"has schedule\", \"object\": \"M-R 9:30 \u2013 10:45 am\"},\n {\"subject\": \"Computer Aided Design\", \"predicate\": \"worth\", \"object\": \"3 Credit Hours\"},\n ",
"metadata_b": {
"language": "en",
"char_length": 962,
"primary_format": "prose",
"structural_signals": {
"has_headings": true,
"has_bullet_lists": false,
"has_numbered_lists": false,
"has_tables": false,
"has_code_blocks": false,
"has_dates": false
},
"content_signals": {
"has_named_people": true,
"has_institutional_language": true,
"has_technical_terminology": true,
"has_first_person": false,
"has_quotations": false
},
"domain_class": "educational",
"one_sentence_summary": "This document outlines a Computer Aided Design course for Fall 2016, including objectives, student learning outcomes, grading, attendance policy, and resources."
}
},
{
"source": "04_ALL_Materials and Their Properties_v3.pptx",
"size_bucket": "large",
"doc_chars": 12000,
"metrics_a": {
"n_entities": 57,
"n_edges": 81,
"predicate_diversity": 30,
"type_diversity": 15,
"avg_degree": 2.84,
"largest_component": 8,
"largest_component_pct": 14.0
},
"metrics_b": {
"n_entities": 130,
"n_edges": 169,
"predicate_diversity": 56,
"type_diversity": 34,
"avg_degree": 2.6,
"largest_component": 58,
"largest_component_pct": 44.6
},
"extraction_a": "```json\n{\n \"entities\": [\n {\"name\": \"PolyJet\", \"type\": \"3D Printing Technology\"},\n {\"name\": \"FDM\", \"type\": \"3D Printing Technology\"},\n {\"name\": \"PLA\", \"type\": \"FDM Material\"},\n {\"name\": \"ABS-PLUS\", \"type\": \"FDM Material\"},\n {\"name\": \"ABSM30\", \"type\": \"FDM Material\"},\n {\"name\": \"ABSI\", \"type\": \"FDM Material\"},\n {\"name\": \"ABS M30I\", \"type\": \"FDM Material\"},\n {\"name\": \"ABS-ESD7\", \"type\": \"FDM Material\"},\n {\"name\": \"ASA\", \"type\": \"FDM Material\"},\n {\"name\": \"NYLON 12\", \"type\": \"FDM Material\"},\n {\"name\": \"NYLON 6\", \"type\": \"FDM Material\"},\n {\"name\": \"PC-ABS\", \"type\": \"FDM Material\"},\n {\"name\": \"PC-ISO\", \"type\": \"FDM Material\"},\n {\"name\": \"ULTEM 9085\", \"type\": \"FDM Material\"},\n {\"name\": \"ULTEM 1010\", \"type\": \"FDM Material\"},\n {\"name\": \"Veros\", \"type\": \"PolyJet Material\"},\n {\"name\": \"Rigur\", \"type\": \"PolyJet Material\"},\n {\"name\": \"Durus\", \"type\": \"PolyJet Material\"},\n {\"name\": \"Tango\", \"type\": \"PolyJet Material\"},\n {\"name\": \"Agilus\", \"type\": \"PolyJet Material\"},\n {\"name\": \"Digital ABS\", \"type\": \"PolyJet Material\"},\n {\"name\": \"VEROCLEAR FULLCURE810\", \"type\": \"PolyJet Material\"},\n {\"name\": \"Thermal Resistance\", \"type\": \"ASTM Testing Standard\"},\n {\"name\": \"Tensile Strength\", \"type\": \"ASTM Testing Standard\"},\n {\"name\": \"Flexural Strength\", \"type\": \"ASTM Testing Standard\"},\n {\"name\": \"IZOD Impact\", \"type\": \"ASTM Testing Standard\"},\n {\"name\": \"Coefficient of Thermal Expansion\", \"type\": \"ASTM Testing Standard\"},\n {\"name\": \"Electrical Properties\", \"type\": \"ASTM Testing Standard\"},\n {\"name\": \"Water Absorption\", \"type\": \"ASTM Testing Standard\"},\n {\"name\": \"Shore Hardness\", \"type\": \"ASTM Testing Standard\"},\n {\"name\": \"Heat Deflection Temperature\", \"type\": \"Material Property\"},\n {\"name\": \"Tensile Modulus\", \"type\": \"Material Property\"},\n {\"name\": \"Elongation to Break\", \"type\": \"Material Property\"},\n {\"name\": \"Toughness\", \"type\": \"Material Property\"},\n {\"name\": \"Flexural Modulus\", \"type\": \"Material Property\"},\n {\"name\": \"Static Dissipative\", \"type\": \"Electrical Property\"},\n {\"name\": \"Surface Resistance\", \"type\": \"Electrical Property\"},\n {\"name\": \"Volume Resistance\", \"type\": \"Electrical Property\"},\n {\"name\": \"Shore A\", \"type\": \"Hardness Scale\"},\n {\"name\": \"Shore D\", \"type\": \"Hardness Scale\"},\n {\"name\": \"SR Support Structure\", \"type\": \"Support Structure Type\"},\n {\"name\": \"BASS Support Structure\", \"type\": \"Support Structure Type\"},\n {\"name\": \"Materials Data Sheet\", \"type\": \"Documentation\"},\n {\"name\": \"Safety Data Sheet\", \"type\": \"Documentation\"},\n {\"name\": \"Stratasys\", \"type\": \"Company\"},\n {\"name\": \"Thermoplastic\", \"type\": \"Material Category\"},\n {\"name\": \"Rigid Opaque\", \"type\": \"PolyJet Family\"},\n {\"name\": \"Rigid Transparent\", \"type\": \"PolyJet Family\"},\n {\"name\": \"Simulated Polypropylene Family\", \"type\": \"PolyJet Family\"},\n {\"name\": \"Flexible Rubber-like Family\", \"type\": \"PolyJet Family\"},\n {\"name\": \"Digital Material\", \"type\": \"PolyJet Family\"},\n {\"name\": \"Standard FDM Materials\", \"type\": \"FDM Material Category\"},\n {\"name\": \"Engineering FDM Materials\", \"type\": \"FDM Material Category\"},\n {\"name\": \"High Performance FDM Materials\", \"type\": \"FDM Material Category\"},\n {\"name\": \"Glass Transition Temperature\", \"type\": \"Material Property\"},\n {\"name\": \"REACH\", \"type\": \"Regulation\"},\n {\"name\": \"Global Harmonized System\", \"type\": \"Standard\"}\n ],\n \"edges\": [\n {\"subject\": \"PolyJet\", \"predicate\": \"offers advantages including\", \"object\": \"Clear transparency\"},\n {\"subject\": \"PolyJet\", \"predicate\": \"offers advantages including\", \"object\": \"Multi-material realism\"},\n {\"subject\": \"PolyJet\", \"predicate\": \"offers advantages including\", \"object\": \"Smooth surface finish\"},\n {\"subject\": \"FDM\", \"predicate\": \"offers advantages including\", \"object\": \"ABS functionality\"},\n {\"subject\": \"FDM\", \"predicate\": \"offers advantages including\", \"object\": \"Economic sparse fill\"},\n {\"subject\": \"FDM\", \"p",
"extraction_b": "```json\n{\n \"entities\": [\n {\"name\": \"Stratasys\", \"type\": \"Company\"},\n {\"name\": \"PolyJet\", \"type\": \"Technology\"},\n {\"name\": \"FDM\", \"type\": \"Technology\"},\n {\"name\": \"PLA\", \"type\": \"Material\"},\n {\"name\": \"ABS-PLUS\", \"type\": \"Material\"},\n {\"name\": \"ABSM30\", \"type\": \"Material\"},\n {\"name\": \"ABSI\", \"type\": \"Material\"},\n {\"name\": \"ABS M30I\", \"type\": \"Material\"},\n {\"name\": \"ABS-ESD7\", \"type\": \"Material\"},\n {\"name\": \"ASA\", \"type\": \"Material\"},\n {\"name\": \"NYLON 12\", \"type\": \"Material\"},\n {\"name\": \"NYLON 6\", \"type\": \"Material\"},\n {\"name\": \"PC-ABS\", \"type\": \"Material\"},\n {\"name\": \"PC-ISO\", \"type\": \"Material\"},\n {\"name\": \"ULTEM 9085\", \"type\": \"Material\"},\n {\"name\": \"ULTEM 1010\", \"type\": \"Material\"},\n {\"name\": \"Veros\", \"type\": \"Material\"},\n {\"name\": \"Rigur\", \"type\": \"Material\"},\n {\"name\": \"Durus\", \"type\": \"Material\"},\n {\"name\": \"Tango\", \"type\": \"Material\"},\n {\"name\": \"Agilus\", \"type\": \"Material\"},\n {\"name\": \"Digital ABS\", \"type\": \"Material\"},\n {\"name\": \"ABSplus\", \"type\": \"Material\"},\n {\"name\": \"VEROCLEAR FULLCURE810\", \"type\": \"Material\"},\n {\"name\": \"ANTERO\", \"type\": \"Material\"},\n {\"name\": \"PC\", \"type\": \"Material\"},\n {\"name\": \"NYLON CF\", \"type\": \"Material\"},\n {\"name\": \"Thermoplastics\", \"type\": \"Material Class\"},\n {\"name\": \"Rigid Opaque\", \"type\": \"Material Family\"},\n {\"name\": \"Rigid Transparent\", \"type\": \"Material Family\"},\n {\"name\": \"Simulated Polypropylene Family\", \"type\": \"Material Family\"},\n {\"name\": \"Flexible Rubber-like Family\", \"type\": \"Material Family\"},\n {\"name\": \"Digital Material\", \"type\": \"Material Family\"},\n {\"name\": \"Standard Materials\", \"type\": \"Material Category\"},\n {\"name\": \"Engineering Materials\", \"type\": \"Material Category\"},\n {\"name\": \"High Performance Materials\", \"type\": \"Material Category\"},\n {\"name\": \"SR support structures\", \"type\": \"Support Structure\"},\n {\"name\": \"BASS support structures\", \"type\": \"Support Structure\"},\n {\"name\": \"Thermal Resistance\", \"type\": \"Testing Standard\"},\n {\"name\": \"Heat Deflection Temperature\", \"type\": \"Testing Standard\"},\n {\"name\": \"HDT\", \"type\": \"Testing Standard\"},\n {\"name\": \"Tensile Strength\", \"type\": \"Testing Standard\"},\n {\"name\": \"Tensile Modulus\", \"type\": \"Testing Standard\"},\n {\"name\": \"Elongation to Break\", \"type\": \"Testing Standard\"},\n {\"name\": \"Toughness\", \"type\": \"Testing Standard\"},\n {\"name\": \"Flexural Strength\", \"type\": \"Testing Standard\"},\n {\"name\": \"Flexural Modulus\", \"type\": \"Testing Standard\"},\n {\"name\": \"IZOD Impact\", \"type\": \"Testing Standard\"},\n {\"name\": \"Coefficient of Thermal Expansion\", \"type\": \"Testing Standard\"},\n {\"name\": \"CTE\", \"type\": \"Testing Standard\"},\n {\"name\": \"Electrical Properties\", \"type\": \"Testing Standard\"},\n {\"name\": \"Static Dissipative\", \"type\": \"Electrical Property\"},\n {\"name\": \"Surface Resistance\", \"type\": \"Electrical Property\"},\n {\"name\": \"Volume Resistance\", \"type\": \"Electrical Property\"},\n {\"name\": \"Water Absorption\", \"type\": \"Testing Standard\"},\n {\"name\": \"Shore Hardness\", \"type\": \"Testing Standard\"},\n {\"name\": \"Shore A\", \"type\": \"Hardness Scale\"},\n {\"name\": \"Shore D\", \"type\": \"Hardness Scale\"},\n {\"name\": \"ASTM\", \"type\": \"Standards Organization\"},\n {\"name\": \"Materials Data Sheet\", \"type\": \"Document Type\"},\n {\"name\": \"Safety Data Sheet\", \"type\": \"Document Type\"},\n {\"name\": \"SDS\", \"type\": \"Document Type\"},\n {\"name\": \"REACH\", \"type\": \"Regulation\"},\n {\"name\": \"Global Harmonized System\", \"type\": \"Standard System\"},\n {\"name\": \"GHS\", \"type\": \"Standard System\"},\n {\"name\": \"Glass Transition Temperature\", \"type\": \"Property\"},\n {\"name\": \"Design Series 3D Printers\", \"type\": \"Equipment\"},\n {\"name\": \"Stress-Strain Graph\", \"type\": \"Testing Method\"},\n {\"name\": \"Two-point Bending\", \"type\": \"Testing Method\"},\n {\"name\": \"Pendulum Impact Test\", \"type\": \"Testing Method\"},\n {\"name\": \"Medical Applications\", \"type\": \"Application Domain\"},\n {\"name\": \"Dental Applic",
"metadata_b": {
"language": "en",
"char_length": 3204,
"primary_format": "prose",
"structural_signals": {
"has_headings": true,
"has_bullet_lists": true,
"has_numbered_lists": false,
"has_tables": false,
"has_code_blocks": false,
"has_dates": false
},
"content_signals": {
"has_named_people": false,
"has_institutional_language": true,
"has_technical_terminology": true,
"has_first_person": false,
"has_quotations": false
},
"domain_class": "technical",
"one_sentence_summary": "This document discusses Stratasys materials and their properties, testing standards, and applications in FDM and PolyJet technologies."
}
},
{
"source": "02_PPT_ALL_AM_Technologies_for_3DP_v3.pptx",
"size_bucket": "large",
"doc_chars": 9360,
"metrics_a": {
"n_entities": 67,
"n_edges": 128,
"predicate_diversity": 11,
"type_diversity": 9,
"avg_degree": 3.82,
"largest_component": 49,
"largest_component_pct": 73.1
},
"metrics_b": {
"n_entities": 105,
"n_edges": 158,
"predicate_diversity": 27,
"type_diversity": 11,
"avg_degree": 3.01,
"largest_component": 72,
"largest_component_pct": 68.6
},
"extraction_a": "```json\n{\n \"entities\": [\n {\"name\": \"Additive Manufacturing\", \"type\": \"Technology Domain\"},\n {\"name\": \"ASTM\", \"type\": \"Organization\"},\n {\"name\": \"Material Extrusion\", \"type\": \"AM Process\"},\n {\"name\": \"FDM\", \"type\": \"AM Technology\"},\n {\"name\": \"Fused Deposition Modeling\", \"type\": \"AM Technology\"},\n {\"name\": \"FFF\", \"type\": \"AM Technology\"},\n {\"name\": \"Fused Filament Fabrication\", \"type\": \"AM Technology\"},\n {\"name\": \"Vat Photopolymerization\", \"type\": \"AM Process\"},\n {\"name\": \"SL\", \"type\": \"AM Technology\"},\n {\"name\": \"SLA\", \"type\": \"AM Technology\"},\n {\"name\": \"Stereolithography\", \"type\": \"AM Technology\"},\n {\"name\": \"DLP\", \"type\": \"AM Technology\"},\n {\"name\": \"Digital Light Processing\", \"type\": \"AM Technology\"},\n {\"name\": \"3SP\", \"type\": \"AM Technology\"},\n {\"name\": \"Powder Bed Fusion\", \"type\": \"AM Process\"},\n {\"name\": \"SLS\", \"type\": \"AM Technology\"},\n {\"name\": \"Selective Laser Sintering\", \"type\": \"AM Technology\"},\n {\"name\": \"DMLS\", \"type\": \"AM Technology\"},\n {\"name\": \"Direct Metal Laser Sintering\", \"type\": \"AM Technology\"},\n {\"name\": \"EBM\", \"type\": \"AM Technology\"},\n {\"name\": \"Electron Beam Melting\", \"type\": \"AM Technology\"},\n {\"name\": \"SHS\", \"type\": \"AM Technology\"},\n {\"name\": \"Selective Heat Sintering\", \"type\": \"AM Technology\"},\n {\"name\": \"Binder Jetting\", \"type\": \"AM Process\"},\n {\"name\": \"CJP\", \"type\": \"AM Technology\"},\n {\"name\": \"ColorJet Printing\", \"type\": \"AM Technology\"},\n {\"name\": \"PP\", \"type\": \"AM Technology\"},\n {\"name\": \"Plaster-based 3D Printing\", \"type\": \"AM Technology\"},\n {\"name\": \"Sheet Lamination\", \"type\": \"AM Process\"},\n {\"name\": \"UC\", \"type\": \"AM Technology\"},\n {\"name\": \"Ultrasonic Consolidation\", \"type\": \"AM Technology\"},\n {\"name\": \"LOM\", \"type\": \"AM Technology\"},\n {\"name\": \"Laminated Object Manufacturing\", \"type\": \"AM Technology\"},\n {\"name\": \"Material Jetting\", \"type\": \"AM Process\"},\n {\"name\": \"MJP\", \"type\": \"AM Technology\"},\n {\"name\": \"PolyJet\", \"type\": \"AM Technology\"},\n {\"name\": \"Photopolymer Jetting\", \"type\": \"AM Technology\"},\n {\"name\": \"PJ\", \"type\": \"AM Technology\"},\n {\"name\": \"MultiJet Printing\", \"type\": \"AM Technology\"},\n {\"name\": \"Directed Energy Deposition\", \"type\": \"AM Process\"},\n {\"name\": \"LMD\", \"type\": \"AM Technology\"},\n {\"name\": \"Laser Metal Deposition\", \"type\": \"AM Technology\"},\n {\"name\": \"Nylon\", \"type\": \"Material\"},\n {\"name\": \"Photopolymers\", \"type\": \"Material\"},\n {\"name\": \"Thermoplastics\", \"type\": \"Material\"},\n {\"name\": \"Metals\", \"type\": \"Material\"},\n {\"name\": \"UV-active photopolymers\", \"type\": \"Material\"},\n {\"name\": \"Prototypes\", \"type\": \"Application\"},\n {\"name\": \"Manufacturing Aides\", \"type\": \"Application\"},\n {\"name\": \"Small series parts\", \"type\": \"Application\"},\n {\"name\": \"Casting patterns\", \"type\": \"Application\"},\n {\"name\": \"Green parts\", \"type\": \"Application\"},\n {\"name\": \"Molds and cores\", \"type\": \"Application\"},\n {\"name\": \"Tools for injection molding\", \"type\": \"Application\"},\n {\"name\": \"Tools for injection molds\", \"type\": \"Application\"},\n {\"name\": \"Support parts\", \"type\": \"Application\"},\n {\"name\": \"Lost wax casting\", \"type\": \"Application\"},\n {\"name\": \"Jewelry\", \"type\": \"Application\"},\n {\"name\": \"Dental\", \"type\": \"Application\"},\n {\"name\": \"Stratasys\", \"type\": \"Company\"},\n {\"name\": \"Eden 1\", \"type\": \"Product\"},\n {\"name\": \"Connex 2 Materials\", \"type\": \"Product\"},\n {\"name\": \"Connex 1/2/3\", \"type\": \"Product\"},\n {\"name\": \"J750\", \"type\": \"Product\"},\n {\"name\": \"Module 2\", \"type\": \"Course Module\"},\n {\"name\": \"Module 3\", \"type\": \"Course Module\"},\n {\"name\": \"Module 4\", \"type\": \"Course Module\"}\n ],\n \"edges\": [\n {\"subject\": \"ASTM\", \"predicate\": \"defines\", \"object\": \"Material Extrusion\"},\n {\"subject\": \"ASTM\", \"predicate\": \"defines\", \"object\": \"Vat Photopolymerization\"},\n {\"subject\": \"ASTM\", \"predicate\": \"defines\", \"object\": \"Powder Bed Fusion\"},\n {\"subject\": \"ASTM\", \"predicate\": \"defines\", \"obje",
"extraction_b": "```json\n{\n \"entities\": [\n {\"name\": \"Additive Manufacturing\", \"type\": \"technology_domain\"},\n {\"name\": \"3D Printing\", \"type\": \"technology_domain\"},\n {\"name\": \"American Society for Testing Materials\", \"type\": \"organization\"},\n {\"name\": \"ASTM\", \"type\": \"organization\"},\n {\"name\": \"Material Extrusion\", \"type\": \"AM_process\"},\n {\"name\": \"FDM\", \"type\": \"AM_technology\"},\n {\"name\": \"Fused Deposition Modeling\", \"type\": \"AM_technology\"},\n {\"name\": \"FFF\", \"type\": \"AM_technology\"},\n {\"name\": \"Fused Filament Fabrication\", \"type\": \"AM_technology\"},\n {\"name\": \"Vat Photopolymerization\", \"type\": \"AM_process\"},\n {\"name\": \"SL\", \"type\": \"AM_technology\"},\n {\"name\": \"SLA\", \"type\": \"AM_technology\"},\n {\"name\": \"Stereolithography\", \"type\": \"AM_technology\"},\n {\"name\": \"DLP\", \"type\": \"AM_technology\"},\n {\"name\": \"Digital Light Processing\", \"type\": \"AM_technology\"},\n {\"name\": \"3SP\", \"type\": \"AM_technology\"},\n {\"name\": \"Scan, Spin, & Selectively Photocure\", \"type\": \"AM_technology\"},\n {\"name\": \"MultiJet Printing\", \"type\": \"AM_technology\"},\n {\"name\": \"Powder Bed Fusion\", \"type\": \"AM_process\"},\n {\"name\": \"SLS\", \"type\": \"AM_technology\"},\n {\"name\": \"Selective Laser Sintering\", \"type\": \"AM_technology\"},\n {\"name\": \"DMLS\", \"type\": \"AM_technology\"},\n {\"name\": \"Direct Metal Laser Sintering\", \"type\": \"AM_technology\"},\n {\"name\": \"EBM\", \"type\": \"AM_technology\"},\n {\"name\": \"Electron Beam Melting\", \"type\": \"AM_technology\"},\n {\"name\": \"SHS\", \"type\": \"AM_technology\"},\n {\"name\": \"Selective Heat Sintering\", \"type\": \"AM_technology\"},\n {\"name\": \"Binder Jetting\", \"type\": \"AM_process\"},\n {\"name\": \"CJP\", \"type\": \"AM_technology\"},\n {\"name\": \"ColorJet Printing\", \"type\": \"AM_technology\"},\n {\"name\": \"PP\", \"type\": \"AM_technology\"},\n {\"name\": \"Plaster-based 3D Printing\", \"type\": \"AM_technology\"},\n {\"name\": \"Sheet Lamination\", \"type\": \"AM_process\"},\n {\"name\": \"UC\", \"type\": \"AM_technology\"},\n {\"name\": \"Ultrasonic Consolidation\", \"type\": \"AM_technology\"},\n {\"name\": \"LOM\", \"type\": \"AM_technology\"},\n {\"name\": \"Laminated Object Manufacturing\", \"type\": \"AM_technology\"},\n {\"name\": \"Directed Energy Deposition\", \"type\": \"AM_process\"},\n {\"name\": \"LMD\", \"type\": \"AM_technology\"},\n {\"name\": \"Laser Metal Deposition\", \"type\": \"AM_technology\"},\n {\"name\": \"Material Jetting\", \"type\": \"AM_process\"},\n {\"name\": \"MJP\", \"type\": \"AM_technology\"},\n {\"name\": \"PJ\", \"type\": \"AM_technology\"},\n {\"name\": \"PolyJet\", \"type\": \"AM_technology\"},\n {\"name\": \"Photopolymer Jetting\", \"type\": \"AM_technology\"},\n {\"name\": \"LM\", \"type\": \"AM_technology\"},\n {\"name\": \"Laser Melting\", \"type\": \"AM_technology\"},\n {\"name\": \"SLM\", \"type\": \"AM_technology\"},\n {\"name\": \"Selective Laser Melting\", \"type\": \"AM_technology\"},\n {\"name\": \"BJ\", \"type\": \"AM_technology\"},\n {\"name\": \"MJ\", \"type\": \"AM_technology\"},\n {\"name\": \"photopolymers\", \"type\": \"material\"},\n {\"name\": \"plastics\", \"type\": \"material\"},\n {\"name\": \"standard plastics\", \"type\": \"material\"},\n {\"name\": \"Nylon\", \"type\": \"material\"},\n {\"name\": \"metals\", \"type\": \"material\"},\n {\"name\": \"standard metals\", \"type\": \"material\"},\n {\"name\": \"thermoplastics\", \"type\": \"material\"},\n {\"name\": \"powder\", \"type\": \"material\"},\n {\"name\": \"wax-like materials\", \"type\": \"material\"},\n {\"name\": \"UV-active photopolymers\", \"type\": \"material\"},\n {\"name\": \"thermoset\", \"type\": \"material\"},\n {\"name\": \"anisotropy\", \"type\": \"characteristic\"},\n {\"name\": \"z-direction\", \"type\": \"characteristic\"},\n {\"name\": \"vertical direction\", \"type\": \"characteristic\"},\n {\"name\": \"step structure\", \"type\": \"characteristic\"},\n {\"name\": \"mechanical properties\", \"type\": \"characteristic\"},\n {\"name\": \"durability\", \"type\": \"characteristic\"},\n {\"name\": \"accuracy\", \"type\": \"characteristic\"},\n {\"name\": \"surface finishes\", \"type\": \"characteristic\"},\n {\"name\": \"build volume\", \"type\": \"characteristic\"},\n {\"name\": \"tolerances\", \"type\": \"charact",
"metadata_b": {
"language": "en",
"char_length": 3068,
"primary_format": "prose",
"structural_signals": {
"has_headings": true,
"has_bullet_lists": true,
"has_numbered_lists": false,
"has_tables": false,
"has_code_blocks": false,
"has_dates": false
},
"content_signals": {
"has_named_people": false,
"has_institutional_language": true,
"has_technical_terminology": true,
"has_first_person": false,
"has_quotations": false
},
"domain_class": "technical",
"one_sentence_summary": "Document discusses seven key Additive Manufacturing technologies and their characteristics"
}
},
{
"source": "CAD I Syllabus.docx",
"size_bucket": "large",
"doc_chars": 12000,
"metrics_a": {
"n_entities": 23,
"n_edges": 24,
"predicate_diversity": 24,
"type_diversity": 18,
"avg_degree": 2.09,
"largest_component": 16,
"largest_component_pct": 69.6
},
"metrics_b": {
"n_entities": 45,
"n_edges": 46,
"predicate_diversity": 32,
"type_diversity": 23,
"avg_degree": 2.04,
"largest_component": 27,
"largest_component_pct": 60.0
},
"extraction_a": "```json\n{\n \"entities\": [\n {\"name\": \"Computer Aided Design I\", \"type\": \"Course\"},\n {\"name\": \"DDF 205-01\", \"type\": \"Course Code\"},\n {\"name\": \"Aaron Nelson\", \"type\": \"Instructor\"},\n {\"name\": \"Spring 2019\", \"type\": \"Semester\"},\n {\"name\": \"SAB 100\", \"type\": \"Location\"},\n {\"name\": \"HVMAC\", \"type\": \"Building\"},\n {\"name\": \"Rhino3D\", \"type\": \"Software\"},\n {\"name\": \"NURBS geometry\", \"type\": \"Concept\"},\n {\"name\": \"3D printing\", \"type\": \"Technology\"},\n {\"name\": \"Digital fabrication\", \"type\": \"Skill\"},\n {\"name\": \"Desktop FDM printing\", \"type\": \"Technology\"},\n {\"name\": \"Rhino 5 Level 1 Training Guide\", \"type\": \"Textbook\"},\n {\"name\": \"Blackboard\", \"type\": \"Platform\"},\n {\"name\": \"Academic Integrity\", \"type\": \"Policy\"},\n {\"name\": \"Disability Resource Center\", \"type\": \"Institution\"},\n {\"name\": \"Deanna Knapp\", \"type\": \"Person\"},\n {\"name\": \"Jean Vizvary\", \"type\": \"Person\"},\n {\"name\": \"SUNY New Paltz\", \"type\": \"Institution\"},\n {\"name\": \"Office of Veteran and Military Services\", \"type\": \"Institution\"},\n {\"name\": \"Student Evaluation of Instruction\", \"type\": \"Assessment\"},\n {\"name\": \"April 2nd\", \"type\": \"Date\"},\n {\"name\": \"April 24-May 8\", \"type\": \"Date Range\"},\n {\"name\": \"Sojourner Truth Library\", \"type\": \"Institution\"}\n ],\n \"edges\": [\n {\"subject\": \"Computer Aided Design I\", \"predicate\": \"has course code\", \"object\": \"DDF 205-01\"},\n {\"subject\": \"Computer Aided Design I\", \"predicate\": \"is taught by\", \"object\": \"Aaron Nelson\"},\n {\"subject\": \"Computer Aided Design I\", \"predicate\": \"is offered in\", \"object\": \"Spring 2019\"},\n {\"subject\": \"Computer Aided Design I\", \"predicate\": \"meets in\", \"object\": \"SAB 100\"},\n {\"subject\": \"SAB 100\", \"predicate\": \"is located in\", \"object\": \"HVMAC\"},\n {\"subject\": \"Computer Aided Design I\", \"predicate\": \"teaches software\", \"object\": \"Rhino3D\"},\n {\"subject\": \"Computer Aided Design I\", \"predicate\": \"covers concept\", \"object\": \"NURBS geometry\"},\n {\"subject\": \"Computer Aided Design I\", \"predicate\": \"uses technology\", \"object\": \"3D printing\"},\n {\"subject\": \"Computer Aided Design I\", \"predicate\": \"develops skill\", \"object\": \"Digital fabrication\"},\n {\"subject\": \"Computer Aided Design I\", \"predicate\": \"covers technology\", \"object\": \"Desktop FDM printing\"},\n {\"subject\": \"Computer Aided Design I\", \"predicate\": \"uses textbook\", \"object\": \"Rhino 5 Level 1 Training Guide\"},\n {\"subject\": \"Computer Aided Design I\", \"predicate\": \"requires submission on\", \"object\": \"Blackboard\"},\n {\"subject\": \"Aaron Nelson\", \"predicate\": \"has office hours\", \"object\": \"M-R 10am \u2013 12pm\"},\n {\"subject\": \"Aaron Nelson\", \"predicate\": \"email is\", \"object\": \"nelsona@newpaltz.edu\"},\n {\"subject\": \"Computer Aided Design I\", \"predicate\": \"enforces policy\", \"object\": \"Academic Integrity\"},\n {\"subject\": \"Students\", \"predicate\": \"can contact for accommodations\", \"object\": \"Disability Resource Center\"},\n {\"subject\": \"Deanna Knapp\", \"predicate\": \"is Assistant Director of\", \"object\": \"Disability Resource Center\"},\n {\"subject\": \"Jean Vizvary\", \"predicate\": \"is Director of\", \"object\": \"Disability Resource Center\"},\n {\"subject\": \"Computer Aided Design I\", \"predicate\": \"is part of\", \"object\": \"SUNY New Paltz\"},\n {\"subject\": \"Students\", \"predicate\": \"can contact for veteran services\", \"object\": \"Office of Veteran and Military Services\"},\n {\"subject\": \"Students\", \"predicate\": \"must complete\", \"object\": \"Student Evaluation of Instruction\"},\n {\"subject\": \"Students\", \"predicate\": \"can drop course without penalty by\", \"object\": \"April 2nd\"},\n {\"subject\": \"Student Evaluation of Instruction\", \"predicate\": \"administered during\", \"object\": \"April 24-May 8\"},\n {\"subject\": \"Sojourner Truth Library\", \"predicate\": \"provides resources on\", \"object\": \"Academic Integrity\"}\n ]\n}\n```",
"extraction_b": "```json\n{\n \"entities\": [\n {\"name\": \"Computer Aided Design I\", \"type\": \"Course\"},\n {\"name\": \"DDF 205-01\", \"type\": \"Course Code\"},\n {\"name\": \"Spring 2019\", \"type\": \"Semester\"},\n {\"name\": \"Aaron Nelson\", \"type\": \"Person\"},\n {\"name\": \"SAB 100\", \"type\": \"Location\"},\n {\"name\": \"HVMAC\", \"type\": \"Location\"},\n {\"name\": \"nelsona@newpaltz.edu\", \"type\": \"Email\"},\n {\"name\": \"Rhino3D\", \"type\": \"Software\"},\n {\"name\": \"NURBS geometry\", \"type\": \"Technical Concept\"},\n {\"name\": \"3D printing\", \"type\": \"Technology\"},\n {\"name\": \"Digital fabrication\", \"type\": \"Technical Concept\"},\n {\"name\": \"Desktop FDM printing\", \"type\": \"Technology\"},\n {\"name\": \"Rhino 5 Level 1 Training Guide\", \"type\": \"Text\"},\n {\"name\": \"Blackboard\", \"type\": \"Platform\"},\n {\"name\": \"April 2nd\", \"type\": \"Date\"},\n {\"name\": \"Attendance Policy\", \"type\": \"Policy\"},\n {\"name\": \"Academic Integrity Policy\", \"type\": \"Policy\"},\n {\"name\": \"Disability Resource Center\", \"type\": \"Institution\"},\n {\"name\": \"Student Union\", \"type\": \"Location\"},\n {\"name\": \"Room 210\", \"type\": \"Location\"},\n {\"name\": \"Deanna Knapp\", \"type\": \"Person\"},\n {\"name\": \"knappd@newpaltz.edu\", \"type\": \"Email\"},\n {\"name\": \"Jean Vizvary\", \"type\": \"Person\"},\n {\"name\": \"vizvaryj@newpaltz.edu\", \"type\": \"Email\"},\n {\"name\": \"New Paltz\", \"type\": \"Institution\"},\n {\"name\": \"SUNY New Paltz\", \"type\": \"Institution\"},\n {\"name\": \"Sojourner Truth Library\", \"type\": \"Institution\"},\n {\"name\": \"Office of Veteran and Military Services\", \"type\": \"Institution\"},\n {\"name\": \"Student Evaluation of Instruction\", \"type\": \"Assessment\"},\n {\"name\": \"April 24-May 8\", \"type\": \"Date Range\"},\n {\"name\": \"Grade A\", \"type\": \"Grade\"},\n {\"name\": \"Grade B\", \"type\": \"Grade\"},\n {\"name\": \"Grade C\", \"type\": \"Grade\"},\n {\"name\": \"Grade D\", \"type\": \"Grade\"},\n {\"name\": \"Grade F\", \"type\": \"Grade\"},\n {\"name\": \"3 absences\", \"type\": \"Policy Rule\"},\n {\"name\": \"10% deduction per day\", \"type\": \"Penalty Rule\"},\n {\"name\": \"CAD software\", \"type\": \"Software\"},\n {\"name\": \"Homework\", \"type\": \"Assignment Type\"},\n {\"name\": \"Projects\", \"type\": \"Assignment Type\"},\n {\"name\": \"Accommodation Memo\", \"type\": \"Document\"},\n {\"name\": \"Acceptable Uses and Privacy Policy\", \"type\": \"Policy\"},\n {\"name\": \"M-R 10am \u2013 12pm\", \"type\": \"Time Slot\"},\n {\"name\": \"T-F 9:30 \u2013 10:45 am\", \"type\": \"Time Slot\"},\n {\"name\": \"3 Credit Hours\", \"type\": \"Credit Value\"}\n ],\n \"edges\": [\n {\"subject\": \"Computer Aided Design I\", \"predicate\": \"has course code\", \"object\": \"DDF 205-01\"},\n {\"subject\": \"Computer Aided Design I\", \"predicate\": \"offered in semester\", \"object\": \"Spring 2019\"},\n {\"subject\": \"Computer Aided Design I\", \"predicate\": \"taught by\", \"object\": \"Aaron Nelson\"},\n {\"subject\": \"Computer Aided Design I\", \"predicate\": \"meets at location\", \"object\": \"SAB 100\"},\n {\"subject\": \"Computer Aided Design I\", \"predicate\": \"meets at location\", \"object\": \"HVMAC\"},\n {\"subject\": \"Computer Aided Design I\", \"predicate\": \"meets during time\", \"object\": \"T-F 9:30 \u2013 10:45 am\"},\n {\"subject\": \"Computer Aided Design I\", \"predicate\": \"has credit hours\", \"object\": \"3 Credit Hours\"},\n {\"subject\": \"Aaron Nelson\", \"predicate\": \"has email\", \"object\": \"nelsona@newpaltz.edu\"},\n {\"subject\": \"Aaron Nelson\", \"predicate\": \"has office hours\", \"object\": \"M-R 10am \u2013 12pm\"},\n {\"subject\": \"Computer Aided Design I\", \"predicate\": \"introduces\", \"object\": \"3D computer aided design drawing\"},\n {\"subject\": \"Computer Aided Design I\", \"predicate\": \"introduces\", \"object\": \"rapid manufacturing\"},\n {\"subject\": \"Computer Aided Design I\", \"predicate\": \"uses software\", \"object\": \"Rhino3D\"},\n {\"subject\": \"Computer Aided Design I\", \"predicate\": \"covers concept\", \"object\": \"NURBS geometry\"},\n {\"subject\": \"Computer Aided Design I\", \"predicate\": \"covers technology\", \"object\": \"3D printing\"},\n {\"subject\": \"Computer Aided Design I\", \"predicate\": \"covers technology\", \"object\": \"Desktop FDM printing\"},\n",
"metadata_b": {
"language": "en",
"char_length": 1024,
"primary_format": "prose",
"structural_signals": {
"has_headings": true,
"has_bullet_lists": false,
"has_numbered_lists": false,
"has_tables": false,
"has_code_blocks": false,
"has_dates": true
},
"content_signals": {
"has_named_people": true,
"has_institutional_language": true,
"has_technical_terminology": true,
"has_first_person": false,
"has_quotations": false
},
"domain_class": "educational",
"one_sentence_summary": "This document outlines a Computer Aided Design course for Spring 2019, including objectives, texts, grading policy, attendance policy, academic integrity policy, accommodation policy, and computer services policy."
}
}
]
}
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,114 @@
{
"results": [
{
"name": "Claude: Modern website redesign with portfolio and contact form",
"bucket": "high",
"a_entities": 30,
"b_entities": 27,
"a_edges": 30,
"b_edges": 27,
"a_rel_types": 2,
"b_rel_types": 2
},
{
"name": "Claude: Preparing for dinner with Jim Agutter",
"bucket": "high",
"a_entities": 29,
"b_entities": 28,
"a_edges": 29,
"b_edges": 28,
"a_rel_types": 2,
"b_rel_types": 2
},
{
"name": "Claude: SUNY school closure risk and New Paltz",
"bucket": "high",
"a_entities": 28,
"b_entities": 25,
"a_edges": 28,
"b_edges": 26,
"a_rel_types": 2,
"b_rel_types": 2
},
{
"name": "ChatGPT: Sanity CMS overview",
"bucket": "mid",
"a_entities": 10,
"b_entities": 13,
"a_edges": 11,
"b_edges": 14,
"a_rel_types": 2,
"b_rel_types": 2
},
{
"name": "ChatGPT: Refactor Balance Logic.",
"bucket": "mid",
"a_entities": 9,
"b_entities": 13,
"a_edges": 9,
"b_edges": 13,
"a_rel_types": 2,
"b_rel_types": 2
},
{
"name": "Claude: Realtor requirements for home buying in New York",
"bucket": "mid",
"a_entities": 9,
"b_entities": 6,
"a_edges": 9,
"b_edges": 6,
"a_rel_types": 2,
"b_rel_types": 2
},
{
"name": "ChatGPT: Python __name__ Main Explanation",
"bucket": "low",
"a_entities": 3,
"b_entities": 3,
"a_edges": 3,
"b_edges": 3,
"a_rel_types": 2,
"b_rel_types": 2
},
{
"name": "ChatGPT: Push changes to repo",
"bucket": "low",
"a_entities": 3,
"b_entities": 3,
"a_edges": 3,
"b_edges": 3,
"a_rel_types": 2,
"b_rel_types": 2
},
{
"name": "Wearable Marquees uw4.pptx",
"bucket": "document",
"a_entities": 13,
"b_entities": 15,
"a_edges": 13,
"b_edges": 15,
"a_rel_types": 2,
"b_rel_types": 2
},
{
"name": "Nic Oconnor Field Work F2023 Syllabus.docx",
"bucket": "document",
"a_entities": 13,
"b_entities": 17,
"a_edges": 13,
"b_edges": 17,
"a_rel_types": 2,
"b_rel_types": 2
}
],
"aggregate": {
"a_entities_total": 147,
"b_entities_total": 150,
"a_edges_total": 148,
"b_edges_total": 152,
"global_predicate_diversity": {
"a": 2,
"b": 2
}
}
}
@@ -0,0 +1,90 @@
{
"per_source": [
{
"name": "Claude: Modern website redesign with portfolio and contact form",
"bucket": "high",
"a_edges": 28,
"a_preds": 16,
"b_edges": 25,
"b_preds": 15
},
{
"name": "Claude: Preparing for dinner with Jim Agutter",
"bucket": "high",
"a_edges": 16,
"a_preds": 11,
"b_edges": 21,
"b_preds": 15
},
{
"name": "Claude: SUNY school closure risk and New Paltz",
"bucket": "high",
"a_edges": 23,
"a_preds": 15,
"b_edges": 16,
"b_preds": 12
},
{
"name": "ChatGPT: Sanity CMS overview",
"bucket": "mid",
"a_edges": 9,
"a_preds": 8,
"b_edges": 8,
"b_preds": 7
},
{
"name": "ChatGPT: Refactor Balance Logic.",
"bucket": "mid",
"a_edges": 1,
"a_preds": 1,
"b_edges": 9,
"b_preds": 8
},
{
"name": "Claude: Realtor requirements for home buying in New York",
"bucket": "mid",
"a_edges": 7,
"a_preds": 4,
"b_edges": 7,
"b_preds": 4
},
{
"name": "ChatGPT: Python __name__ Main Explanation",
"bucket": "low",
"a_edges": 2,
"a_preds": 2,
"b_edges": 5,
"b_preds": 2
},
{
"name": "ChatGPT: Push changes to repo",
"bucket": "low",
"a_edges": 2,
"a_preds": 2,
"b_edges": 2,
"b_preds": 2
},
{
"name": "Wearable Marquees uw4.pptx",
"bucket": "document",
"a_edges": 12,
"a_preds": 6,
"b_edges": 10,
"b_preds": 10
},
{
"name": "Nic Oconnor Field Work F2023 Syllabus.docx",
"bucket": "document",
"a_edges": 9,
"a_preds": 5,
"b_edges": 13,
"b_preds": 10
}
],
"aggregate": {
"a_preds": 70,
"b_preds": 85,
"a_edges": 109,
"b_edges": 116
}
}
+344
View File
@@ -0,0 +1,344 @@
{
"results": [
{
"name": "Claude: Modern website redesign with portfolio and contact form",
"bucket": "high",
"tier1_entities": 30,
"doc_chars": 25254,
"metadata": {
"language": "en",
"char_length": 12000,
"primary_format": "prose",
"structural_signals": {
"has_headings": true,
"has_bullet_lists": false,
"has_numbered_lists": false,
"has_tables": false,
"has_code_blocks": false,
"has_dates": false
},
"content_signals": {
"has_named_people": true,
"has_institutional_language": true,
"has_technical_terminology": true,
"has_first_person": true,
"has_quotations": false
},
"domain_class": "personal",
"one_sentence_summary": "Discussion on redesigning a personal website for an artist and designer focusing on portfolio and contact form"
},
"metadata_elapsed_s": 128.0,
"custom_extraction_instructions": "This is a personal document in prose format. Summary: Discussion on redesigning a personal website for an artist and designer focusing on portfolio and contact form This metadata is provided to orient your extraction, not to constrain it. Extract entities and relationships freely from the document text itself; the metadata is descriptive context, not a checklist.",
"submit_elapsed_s": 24.9,
"submit_result": {
"ok": true
}
},
{
"name": "Claude: Preparing for dinner with Jim Agutter",
"bucket": "high",
"tier1_entities": 29,
"doc_chars": 10053,
"metadata": {
"language": "en",
"char_length": 10053,
"primary_format": "prose",
"structural_signals": {
"has_headings": true,
"has_bullet_lists": false,
"has_numbered_lists": false,
"has_tables": false,
"has_code_blocks": false,
"has_dates": false
},
"content_signals": {
"has_named_people": true,
"has_institutional_language": true,
"has_technical_terminology": true,
"has_first_person": false,
"has_quotations": false
},
"domain_class": "educational",
"one_sentence_summary": "Preparation for a dinner meeting with Jim Agutter to discuss his background, roles, research focus, projects, awards, and strategies for discussing contribution to the broader mission and interdisciplinary partnerships at his institution."
},
"metadata_elapsed_s": 96.7,
"custom_extraction_instructions": "This is a educational document in prose format. Summary: Preparation for a dinner meeting with Jim Agutter to discuss his background, roles, research focus, projects, awards, and strategies for discussing contribution to the broader mission and interdisciplinary partnerships at his institution. This metadata is provided to orient your extraction, not to constrain it. Extract entities and relationships freely from the document text itself; the metadata is descriptive context, not a checklist.",
"submit_elapsed_s": 37.0,
"submit_result": {
"ok": true
}
},
{
"name": "Claude: SUNY school closure risk and New Paltz",
"bucket": "high",
"tier1_entities": 28,
"doc_chars": 25937,
"metadata": {
"language": "en",
"char_length": 12000,
"primary_format": "prose",
"structural_signals": {
"has_headings": true,
"has_bullet_lists": true,
"has_numbered_lists": false,
"has_tables": false,
"has_code_blocks": false,
"has_dates": true
},
"content_signals": {
"has_named_people": true,
"has_institutional_language": true,
"has_technical_terminology": true,
"has_first_person": true,
"has_quotations": false
},
"domain_class": "educational",
"one_sentence_summary": "Discussion about the risk of closure for a specific program at SUNY New Paltz and the potential impact on the program director's career"
},
"metadata_elapsed_s": 119.0,
"custom_extraction_instructions": "This is a educational document in prose format. Summary: Discussion about the risk of closure for a specific program at SUNY New Paltz and the potential impact on the program director's career This metadata is provided to orient your extraction, not to constrain it. Extract entities and relationships freely from the document text itself; the metadata is descriptive context, not a checklist.",
"submit_elapsed_s": 27.4,
"submit_result": {
"ok": true
}
},
{
"name": "ChatGPT: Sanity CMS overview",
"bucket": "mid",
"tier1_entities": 10,
"doc_chars": 14183,
"metadata": {
"language": "en",
"char_length": 12000,
"primary_format": "prose",
"structural_signals": {
"has_headings": true,
"has_bullet_lists": false,
"has_numbered_lists": false,
"has_tables": false,
"has_code_blocks": false,
"has_dates": false
},
"content_signals": {
"has_named_people": false,
"has_institutional_language": true,
"has_technical_terminology": true,
"has_first_person": false,
"has_quotations": false
},
"domain_class": "educational",
"one_sentence_summary": "Discussion about Sanity CMS, its functionality, and how it is used with Next.js for content management."
},
"metadata_elapsed_s": 146.0,
"custom_extraction_instructions": "This is a educational document in prose format. Summary: Discussion about Sanity CMS, its functionality, and how it is used with Next.js for content management. This metadata is provided to orient your extraction, not to constrain it. Extract entities and relationships freely from the document text itself; the metadata is descriptive context, not a checklist.",
"submit_elapsed_s": 20.8,
"submit_result": {
"ok": true
}
},
{
"name": "ChatGPT: Refactor Balance Logic.",
"bucket": "mid",
"tier1_entities": 9,
"doc_chars": 145479,
"metadata": {
"language": "en",
"char_length": 12000,
"primary_format": "prose",
"structural_signals": {
"has_headings": false,
"has_bullet_lists": false,
"has_numbered_lists": false,
"has_tables": false,
"has_code_blocks": false,
"has_dates": true
},
"content_signals": {
"has_named_people": false,
"has_institutional_language": false,
"has_technical_terminology": true,
"has_first_person": true,
"has_quotations": false
},
"domain_class": "technical",
"one_sentence_summary": "The document discusses a MedianMaintainingHeap class in Python for maintaining a running median in a stream of data using two heaps, a max heap and a min heap."
},
"metadata_elapsed_s": 143.1,
"custom_extraction_instructions": "This is a technical document in prose format. Summary: The document discusses a MedianMaintainingHeap class in Python for maintaining a running median in a stream of data using two heaps, a max heap and a min heap. This metadata is provided to orient your extraction, not to constrain it. Extract entities and relationships freely from the document text itself; the metadata is descriptive context, not a checklist.",
"submit_elapsed_s": 18.1,
"submit_result": {
"ok": true
}
},
{
"name": "Claude: Realtor requirements for home buying in New York",
"bucket": "mid",
"tier1_entities": 9,
"doc_chars": 15729,
"metadata": {
"language": "en",
"char_length": 12000,
"primary_format": "prose",
"structural_signals": {
"has_headings": true,
"has_bullet_lists": false,
"has_numbered_lists": false,
"has_tables": false,
"has_code_blocks": false,
"has_dates": true
},
"content_signals": {
"has_named_people": false,
"has_institutional_language": false,
"has_technical_terminology": true,
"has_first_person": true,
"has_quotations": false
},
"domain_class": "educational",
"one_sentence_summary": "Document explains the process of buying a home in New York without a realtor, outlining steps such as financing, scheduling showings, conducting comparative market analysis, making an offer, hiring a real estate attorney, getting a home inspection, finalizing mortgage, and closing."
},
"metadata_elapsed_s": 118.9,
"custom_extraction_instructions": "This is a educational document in prose format. Summary: Document explains the process of buying a home in New York without a realtor, outlining steps such as financing, scheduling showings, conducting comparative market analysis, making an offer, hiring a real estate attorney, getting a home inspection, finalizing mortgage, and closing. This metadata is provided to orient your extraction, not to constrain it. Extract entities and relationships freely from the document text itself; the metadata is descriptive context, not a checklist.",
"submit_elapsed_s": 18.6,
"submit_result": {
"ok": true
}
},
{
"name": "ChatGPT: Python __name__ Main Explanation",
"bucket": "low",
"tier1_entities": 3,
"doc_chars": 4328,
"metadata": {
"language": "en",
"char_length": 4328,
"primary_format": "prose",
"structural_signals": {
"has_headings": true,
"has_bullet_lists": true,
"has_numbered_lists": false,
"has_tables": false,
"has_code_blocks": true,
"has_dates": false
},
"content_signals": {
"has_named_people": false,
"has_institutional_language": false,
"has_technical_terminology": true,
"has_first_person": false,
"has_quotations": false
},
"domain_class": "educational",
"one_sentence_summary": "Discussion on Python script execution, dot product calculation, and usage of enumerate function in Python"
},
"metadata_elapsed_s": 53.8,
"custom_extraction_instructions": "This is a educational document in prose format. Summary: Discussion on Python script execution, dot product calculation, and usage of enumerate function in Python This metadata is provided to orient your extraction, not to constrain it. Extract entities and relationships freely from the document text itself; the metadata is descriptive context, not a checklist.",
"submit_elapsed_s": 7.9,
"submit_result": {
"ok": true
}
},
{
"name": "ChatGPT: Push changes to repo",
"bucket": "low",
"tier1_entities": 3,
"doc_chars": 1323,
"metadata": {
"language": "en",
"char_length": 1323,
"primary_format": "prose",
"structural_signals": {
"has_headings": false,
"has_bullet_lists": false,
"has_numbered_lists": false,
"has_tables": false,
"has_code_blocks": true,
"has_dates": false
},
"content_signals": {
"has_named_people": false,
"has_institutional_language": false,
"has_technical_terminology": true,
"has_first_person": false,
"has_quotations": false
},
"domain_class": "educational",
"one_sentence_summary": "A conversation on how to push changes made in a local development directory to a Git repository"
},
"metadata_elapsed_s": 24.5,
"custom_extraction_instructions": "This is a educational document in prose format. Summary: A conversation on how to push changes made in a local development directory to a Git repository This metadata is provided to orient your extraction, not to constrain it. Extract entities and relationships freely from the document text itself; the metadata is descriptive context, not a checklist.",
"submit_elapsed_s": 7.2,
"submit_result": {
"ok": true
}
},
{
"name": "Wearable Marquees uw4.pptx",
"bucket": "document",
"tier1_entities": 13,
"doc_chars": 11408,
"metadata": {
"language": "en",
"char_length": 11408,
"primary_format": "code",
"structural_signals": {
"has_headings": false,
"has_bullet_lists": false,
"has_numbered_lists": false,
"has_tables": false,
"has_code_blocks": true,
"has_dates": false
},
"content_signals": {
"has_named_people": false,
"has_institutional_language": false,
"has_technical_terminology": true,
"has_first_person": false,
"has_quotations": false
},
"domain_class": "educational",
"one_sentence_summary": "This document is a code for a wearable marquee with the text 'Wearable Marquees UW \u2013 Stout September 2025' displayed using an Adafruit DotStar Matrix."
},
"metadata_elapsed_s": 152.3,
"custom_extraction_instructions": "This is a educational document in code format. Summary: This document is a code for a wearable marquee with the text 'Wearable Marquees UW \u2013 Stout September 2025' displayed using an Adafruit DotStar Matrix. This metadata is provided to orient your extraction, not to constrain it. Extract entities and relationships freely from the document text itself; the metadata is descriptive context, not a checklist.",
"submit_elapsed_s": 20.0,
"submit_result": {
"ok": true
}
},
{
"name": "Nic Oconnor Field Work F2023 Syllabus.docx",
"bucket": "document",
"tier1_entities": 13,
"doc_chars": 17142,
"metadata": {
"language": "en",
"char_length": 12000,
"primary_format": "prose",
"structural_signals": {
"has_headings": true,
"has_bullet_lists": false,
"has_numbered_lists": false,
"has_tables": false,
"has_code_blocks": false,
"has_dates": true
},
"content_signals": {
"has_named_people": true,
"has_institutional_language": true,
"has_technical_terminology": true,
"has_first_person": true,
"has_quotations": false
},
"domain_class": "educational",
"one_sentence_summary": "This document details an independent study focusing on a direct work experience at Dogwood Entertainment for DDF 795, a 3D modeling course."
},
"metadata_elapsed_s": 118.4,
"custom_extraction_instructions": "This is a educational document in prose format. Summary: This document details an independent study focusing on a direct work experience at Dogwood Entertainment for DDF 795, a 3D modeling course. This metadata is provided to orient your extraction, not to constrain it. Extract entities and relationships freely from the document text itself; the metadata is descriptive context, not a checklist.",
"submit_elapsed_s": 26.3,
"submit_result": {
"ok": true
}
}
]
}
@@ -0,0 +1,354 @@
{
"results": [
{
"name": "Claude: Modern website redesign with portfolio and contact form",
"bucket": "high",
"tier1_entities": 30,
"doc_chars": 25254,
"metadata": {
"language": "en",
"char_length": 12000,
"primary_format": "prose",
"structural_signals": {
"has_headings": true,
"has_bullet_lists": false,
"has_numbered_lists": false,
"has_tables": false,
"has_code_blocks": false,
"has_dates": true
},
"content_signals": {
"has_named_people": false,
"has_institutional_language": true,
"has_technical_terminology": true,
"has_first_person": true,
"has_quotations": false
},
"domain_class": "personal",
"one_sentence_summary": "Discussion on redesigning a personal website for a designer, focusing on portfolio and contact form"
},
"metadata_elapsed_s": 128.8,
"source_description": "This is a personal document in prose format. Summary: Discussion on redesigning a personal website for a designer, focusing on portfolio and contact form This metadata is provided to orient your extraction, not to constrain it. Extract entities and relationships freely from the document text itself; the metadata is descriptive context, not a checklist.",
"submit_elapsed_s": 34.2,
"submit_result": {
"ok": true,
"count": 1
}
},
{
"name": "Claude: Preparing for dinner with Jim Agutter",
"bucket": "high",
"tier1_entities": 29,
"doc_chars": 10053,
"metadata": {
"language": "en",
"char_length": 10053,
"primary_format": "prose",
"structural_signals": {
"has_headings": true,
"has_bullet_lists": false,
"has_numbered_lists": false,
"has_tables": false,
"has_code_blocks": false,
"has_dates": true
},
"content_signals": {
"has_named_people": true,
"has_institutional_language": true,
"has_technical_terminology": true,
"has_first_person": false,
"has_quotations": false
},
"domain_class": "educational",
"one_sentence_summary": "A conversation between the interviewee and Claude about preparing for a dinner with Jim Agutter, discussing his background, current roles, research focus, recent projects, awards, and strategies for their conversation"
},
"metadata_elapsed_s": 95.3,
"source_description": "This is a educational document in prose format. Summary: A conversation between the interviewee and Claude about preparing for a dinner with Jim Agutter, discussing his background, current roles, research focus, recent projects, awards, and strategies for their conversation This metadata is provided to orient your extraction, not to constrain it. Extract entities and relationships freely from the document text itself; the metadata is descriptive context, not a checklist.",
"submit_elapsed_s": 48.1,
"submit_result": {
"ok": true,
"count": 1
}
},
{
"name": "Claude: SUNY school closure risk and New Paltz",
"bucket": "high",
"tier1_entities": 28,
"doc_chars": 25937,
"metadata": {
"language": "en",
"char_length": 12000,
"primary_format": "prose",
"structural_signals": {
"has_headings": true,
"has_bullet_lists": false,
"has_numbered_lists": false,
"has_tables": false,
"has_code_blocks": false,
"has_dates": true
},
"content_signals": {
"has_named_people": true,
"has_institutional_language": true,
"has_technical_terminology": true,
"has_first_person": true,
"has_quotations": false
},
"domain_class": "educational",
"one_sentence_summary": "Discussion about the risk of program closure at SUNY New Paltz and the pros and cons of staying or leaving the institution"
},
"metadata_elapsed_s": 118.1,
"source_description": "This is a educational document in prose format. Summary: Discussion about the risk of program closure at SUNY New Paltz and the pros and cons of staying or leaving the institution This metadata is provided to orient your extraction, not to constrain it. Extract entities and relationships freely from the document text itself; the metadata is descriptive context, not a checklist.",
"submit_elapsed_s": 44.4,
"submit_result": {
"ok": true,
"count": 1
}
},
{
"name": "ChatGPT: Sanity CMS overview",
"bucket": "mid",
"tier1_entities": 10,
"doc_chars": 14183,
"metadata": {
"language": "en",
"char_length": 12000,
"primary_format": "prose",
"structural_signals": {
"has_headings": true,
"has_bullet_lists": false,
"has_numbered_lists": false,
"has_tables": false,
"has_code_blocks": false,
"has_dates": false
},
"content_signals": {
"has_named_people": false,
"has_institutional_language": true,
"has_technical_terminology": true,
"has_first_person": false,
"has_quotations": false
},
"domain_class": "educational",
"one_sentence_summary": "This document provides an overview of Sanity CMS, its functionality and features."
},
"metadata_elapsed_s": 143.4,
"source_description": "This is a educational document in prose format. Summary: This document provides an overview of Sanity CMS, its functionality and features. This metadata is provided to orient your extraction, not to constrain it. Extract entities and relationships freely from the document text itself; the metadata is descriptive context, not a checklist.",
"submit_elapsed_s": 31.3,
"submit_result": {
"ok": true,
"count": 1
}
},
{
"name": "ChatGPT: Refactor Balance Logic.",
"bucket": "mid",
"tier1_entities": 9,
"doc_chars": 145479,
"metadata": {
"language": "en",
"char_length": 12000,
"primary_format": "prose",
"structural_signals": {
"has_headings": true,
"has_bullet_lists": false,
"has_numbered_lists": false,
"has_tables": false,
"has_code_blocks": false,
"has_dates": false
},
"content_signals": {
"has_named_people": false,
"has_institutional_language": false,
"has_technical_terminology": true,
"has_first_person": true,
"has_quotations": false
},
"domain_class": "technical",
"one_sentence_summary": "A document discussing a MedianMaintainingHeap class for maintaining the median of a stream of data using two heaps"
},
"metadata_elapsed_s": 141.6,
"source_description": "This is a technical document in prose format. Summary: A document discussing a MedianMaintainingHeap class for maintaining the median of a stream of data using two heaps This metadata is provided to orient your extraction, not to constrain it. Extract entities and relationships freely from the document text itself; the metadata is descriptive context, not a checklist.",
"submit_elapsed_s": 22.8,
"submit_result": {
"ok": true,
"count": 1
}
},
{
"name": "Claude: Realtor requirements for home buying in New York",
"bucket": "mid",
"tier1_entities": 9,
"doc_chars": 15729,
"metadata": {
"language": "en",
"char_length": 12000,
"primary_format": "prose",
"structural_signals": {
"has_headings": true,
"has_bullet_lists": false,
"has_numbered_lists": false,
"has_tables": false,
"has_code_blocks": false,
"has_dates": true
},
"content_signals": {
"has_named_people": true,
"has_institutional_language": false,
"has_technical_terminology": true,
"has_first_person": true,
"has_quotations": false
},
"domain_class": "educational",
"one_sentence_summary": "A conversation discussing the process of buying a home in New York without a realtor and the role a realtor plays"
},
"metadata_elapsed_s": 115.4,
"source_description": "This is a educational document in prose format. Summary: A conversation discussing the process of buying a home in New York without a realtor and the role a realtor plays This metadata is provided to orient your extraction, not to constrain it. Extract entities and relationships freely from the document text itself; the metadata is descriptive context, not a checklist.",
"submit_elapsed_s": 15.1,
"submit_result": {
"ok": true,
"count": 1
}
},
{
"name": "ChatGPT: Python __name__ Main Explanation",
"bucket": "low",
"tier1_entities": 3,
"doc_chars": 4328,
"metadata": {
"language": "en",
"char_length": 4328,
"primary_format": "mixed",
"structural_signals": {
"has_headings": true,
"has_bullet_lists": true,
"has_numbered_lists": false,
"has_tables": false,
"has_code_blocks": true,
"has_dates": false
},
"content_signals": {
"has_named_people": false,
"has_institutional_language": false,
"has_technical_terminology": true,
"has_first_person": false,
"has_quotations": false
},
"domain_class": "educational",
"one_sentence_summary": "The document provides explanations about various topics in Python programming, including the __name__ check, vector dot product, and enumerate function."
},
"metadata_elapsed_s": 54.7,
"source_description": "This is a educational document in mixed format. Summary: The document provides explanations about various topics in Python programming, including the __name__ check, vector dot product, and enumerate function. This metadata is provided to orient your extraction, not to constrain it. Extract entities and relationships freely from the document text itself; the metadata is descriptive context, not a checklist.",
"submit_elapsed_s": 16.8,
"submit_result": {
"ok": true,
"count": 1
}
},
{
"name": "ChatGPT: Push changes to repo",
"bucket": "low",
"tier1_entities": 3,
"doc_chars": 1323,
"metadata": {
"language": "en",
"char_length": 1323,
"primary_format": "prose",
"structural_signals": {
"has_headings": false,
"has_bullet_lists": false,
"has_numbered_lists": false,
"has_tables": false,
"has_code_blocks": true,
"has_dates": false
},
"content_signals": {
"has_named_people": false,
"has_institutional_language": false,
"has_technical_terminology": true,
"has_first_person": false,
"has_quotations": false
},
"domain_class": "educational",
"one_sentence_summary": "Instructions on how to push changes made locally in a Git repository."
},
"metadata_elapsed_s": 23.7,
"source_description": "This is a educational document in prose format. Summary: Instructions on how to push changes made locally in a Git repository. This metadata is provided to orient your extraction, not to constrain it. Extract entities and relationships freely from the document text itself; the metadata is descriptive context, not a checklist.",
"submit_elapsed_s": 10.8,
"submit_result": {
"ok": true,
"count": 1
}
},
{
"name": "Wearable Marquees uw4.pptx",
"bucket": "document",
"tier1_entities": 13,
"doc_chars": 11408,
"metadata": {
"language": "en",
"char_length": 11408,
"primary_format": "code",
"structural_signals": {
"has_headings": false,
"has_bullet_lists": false,
"has_numbered_lists": false,
"has_tables": false,
"has_code_blocks": true,
"has_dates": false
},
"content_signals": {
"has_named_people": false,
"has_institutional_language": false,
"has_technical_terminology": true,
"has_first_person": false,
"has_quotations": false
},
"domain_class": "technical",
"one_sentence_summary": "This document contains code for a wearable marquee display using Adafruit DotStar Matrix."
},
"metadata_elapsed_s": 150.0,
"source_description": "This is a technical document in code format. Summary: This document contains code for a wearable marquee display using Adafruit DotStar Matrix. This metadata is provided to orient your extraction, not to constrain it. Extract entities and relationships freely from the document text itself; the metadata is descriptive context, not a checklist.",
"submit_elapsed_s": 31.9,
"submit_result": {
"ok": true,
"count": 1
}
},
{
"name": "Nic Oconnor Field Work F2023 Syllabus.docx",
"bucket": "document",
"tier1_entities": 13,
"doc_chars": 17142,
"metadata": {
"language": "en",
"char_length": 12000,
"primary_format": "prose",
"structural_signals": {
"has_headings": true,
"has_bullet_lists": false,
"has_numbered_lists": false,
"has_tables": false,
"has_code_blocks": false,
"has_dates": true
},
"content_signals": {
"has_named_people": true,
"has_institutional_language": true,
"has_technical_terminology": true,
"has_first_person": true,
"has_quotations": false
},
"domain_class": "educational",
"one_sentence_summary": "This document is a course syllabus for an independent study in 3D modeling at New Paltz University in Fall 2023"
},
"metadata_elapsed_s": 118.0,
"source_description": "This is a educational document in prose format. Summary: This document is a course syllabus for an independent study in 3D modeling at New Paltz University in Fall 2023 This metadata is provided to orient your extraction, not to constrain it. Extract entities and relationships freely from the document text itself; the metadata is descriptive context, not a checklist.",
"submit_elapsed_s": 29.8,
"submit_result": {
"ok": true,
"count": 1
}
}
]
}
+61
View File
@@ -0,0 +1,61 @@
{
"metadata": {
"purpose": "E1 cascade re-extraction sample (n=10)",
"stratification": "density buckets + document subset",
"quartile_top": 19,
"quartile_bottom": 5,
"total_tier1_episodes": 250
},
"selected": [
{
"name": "Claude: Modern website redesign with portfolio and contact form",
"entities": 30,
"bucket": "high"
},
{
"name": "Claude: Preparing for dinner with Jim Agutter",
"entities": 29,
"bucket": "high"
},
{
"name": "Claude: SUNY school closure risk and New Paltz",
"entities": 28,
"bucket": "high"
},
{
"name": "ChatGPT: Sanity CMS overview",
"entities": 10,
"bucket": "mid"
},
{
"name": "ChatGPT: Refactor Balance Logic.",
"entities": 9,
"bucket": "mid"
},
{
"name": "Claude: Realtor requirements for home buying in New York",
"entities": 9,
"bucket": "mid"
},
{
"name": "ChatGPT: Python __name__ Main Explanation",
"entities": 3,
"bucket": "low"
},
{
"name": "ChatGPT: Push changes to repo",
"entities": 3,
"bucket": "low"
},
{
"name": "Wearable Marquees uw4.pptx",
"entities": 13,
"bucket": "document"
},
{
"name": "Nic Oconnor Field Work F2023 Syllabus.docx",
"entities": 13,
"bucket": "document"
}
]
}
+736
View File
@@ -0,0 +1,736 @@
{
"experiment": "cascade_test",
"title": "Nodes-vs-Edges Cascade Experiment",
"started_at": "2026-04-28T05:13:36.125237+00:00",
"completed_at": "2026-04-28T05:24:53.344615+00:00",
"haiku_model": "claude-haiku-4-5-20251001",
"haiku_temperature": 0.0,
"haiku_max_tokens": 4096,
"local_model": "mistral",
"max_doc_chars": 8000,
"n_documents": 15,
"n_valid_pairs": 14,
"n_skipped": 1,
"total_elapsed_s": 677.2,
"totals": {
"a_input_tokens": 11015,
"a_output_tokens": 18803,
"b_input_tokens": 12685,
"b_output_tokens": 15809,
"a_cost_usd": 0.105,
"b_cost_usd": 0.0917,
"cost_delta_usd": -0.0133,
"cost_delta_pct": -12.66,
"note": "API cost only \u2014 local Mistral runtime on VPS not monetized"
},
"by_size_bucket": {
"small": {
"n": 5,
"a_input_tokens": 1236,
"a_output_tokens": 2788,
"b_input_tokens": 1599,
"b_output_tokens": 2815,
"input_delta_pct": 29.37,
"output_delta_pct": 0.97,
"a_avg_edges": 14,
"b_avg_edges": 13.6
},
"medium": {
"n": 6,
"a_input_tokens": 3595,
"a_output_tokens": 8477,
"b_input_tokens": 4295,
"b_output_tokens": 6153,
"input_delta_pct": 19.47,
"output_delta_pct": -27.42,
"a_avg_edges": 32.5,
"b_avg_edges": 24
},
"large": {
"n": 3,
"a_input_tokens": 6184,
"a_output_tokens": 7538,
"b_input_tokens": 6791,
"b_output_tokens": 6841,
"input_delta_pct": 9.82,
"output_delta_pct": -9.25,
"a_avg_edges": 40,
"b_avg_edges": 32
}
},
"results": [
{
"source": "02_2D Geometry.docx",
"size_bucket": "small",
"doc_chars_original": 188,
"doc_chars_sent": 188,
"truncated": false,
"condition_a": {
"input_tokens": 207,
"output_tokens": 718,
"latency_s": 3.21,
"entity_count": 10,
"edge_count": 21,
"stop_reason": "end_turn",
"response_text": "```json\n{\n \"entities\": [\n {\"name\": \"2D geometry\", \"type\": \"geometric concept\"},\n {\"name\": \"Curves\", \"type\": \"geometric element\"},\n {\"name\": \"lines\", \"type\": \"geometric element\"},\n {\"name\": \"Poly lines\", \"type\": \"geometric element\"},\n {\"name\": \"planar\", \"type\": \"geometric property\"},\n {\"name\": \"non-planar\", \"type\": \"geometric property\"},\n {\"name\": \"open\", \"type\": \"geometric property\"},\n {\"name\": \"closed\", \"type\": \"geometric property\"},\n {\"name\": \"complex forms\", \"type\": \"geometric concept\"},\n {\"name\": \"geometries\", \"type\": \"geometric concept\"}\n ],\n \"edges\": [\n {\"subject\": \"2D geometry\", \"predicate\": \"is comprised of\", \"object\": \"Curves\"},\n {\"subject\": \"2D geometry\", \"predicate\": \"is comprised of\", \"object\": \"lines\"},\n {\"subject\": \"2D geometry\", \"predicate\": \"is comprised of\", \"object\": \"Poly lines\"},\n {\"subject\": \"Curves\", \"predicate\": \"can be\", \"object\": \"planar\"},\n {\"subject\": \"Curves\", \"predicate\": \"can be\", \"object\": \"non-planar\"},\n {\"subject\": \"Curves\", \"predicate\": \"can be\", \"object\": \"open\"},\n {\"subject\": \"Curves\", \"predicate\": \"can be\", \"object\": \"closed\"},\n {\"subject\": \"lines\", \"predicate\": \"can be\", \"object\": \"planar\"},\n {\"subject\": \"lines\", \"predicate\": \"can be\", \"object\": \"non-planar\"},\n {\"subject\": \"lines\", \"predicate\": \"can be\", \"object\": \"open\"},\n {\"subject\": \"lines\", \"predicate\": \"can be\", \"object\": \"closed\"},\n {\"subject\": \"Poly lines\", \"predicate\": \"can be\", \"object\": \"planar\"},\n {\"subject\": \"Poly lines\", \"predicate\": \"can be\", \"object\": \"non-planar\"},\n {\"subject\": \"Poly lines\", \"predicate\": \"can be\", \"object\": \"open\"},\n {\"subject\": \"Poly lines\", \"predicate\": \"can be\", \"object\": \"closed\"},\n {\"subject\": \"Curves\", \"predicate\": \"are building blocks to produce\", \"object\": \"complex forms\"},\n {\"subject\": \"lines\", \"predicate\": \"are building blocks to produce\", \"object\": \"complex forms\"},\n {\"subject\": \"Poly lines\", \"predicate\": \"are building blocks to produce\", \"object\": \"complex forms\"},\n {\"subject\": \"Curves\", \"predicate\": \"are building blocks to produce\", \"object\": \"geometries\"},\n {\"subject\": \"lines\", \"predicate\": \"are building blocks to produce\", \"object\": \"geometries\"},\n {\"subject\": \"Poly lines\", \"predicate\": \"are building blocks to produce\", \"object\": \"geometries\"}\n ]\n}\n```",
"error": null
},
"condition_b": {
"local_latency_s": 2.73,
"local_candidates": [
"BASICS",
"Curves",
"lines",
"Poly lines",
"Planar",
"non-planar",
"open",
"closed"
],
"local_raw": "{\n \"candidates\": [\"BASICS\", \"Curves\", \"lines\", \"Poly lines\", \"Planar\", \"non-planar\", \"open\", \"closed\"]\n}",
"api_input_tokens": 278,
"api_output_tokens": 735,
"api_latency_s": 4.31,
"entity_count": 11,
"edge_count": 21,
"stop_reason": "end_turn",
"response_text": "```json\n{\n \"entities\": [\n {\"name\": \"BASICS\", \"type\": \"concept\"},\n {\"name\": \"2D geometry\", \"type\": \"geometric_domain\"},\n {\"name\": \"Curves\", \"type\": \"geometric_element\"},\n {\"name\": \"lines\", \"type\": \"geometric_element\"},\n {\"name\": \"Poly lines\", \"type\": \"geometric_element\"},\n {\"name\": \"planar\", \"type\": \"property\"},\n {\"name\": \"non-planar\", \"type\": \"property\"},\n {\"name\": \"open\", \"type\": \"property\"},\n {\"name\": \"closed\", \"type\": \"property\"},\n {\"name\": \"complex forms\", \"type\": \"geometric_construct\"},\n {\"name\": \"geometries\", \"type\": \"geometric_construct\"}\n ],\n \"edges\": [\n {\"subject\": \"2D geometry\", \"predicate\": \"is comprised of\", \"object\": \"Curves\"},\n {\"subject\": \"2D geometry\", \"predicate\": \"is comprised of\", \"object\": \"lines\"},\n {\"subject\": \"2D geometry\", \"predicate\": \"is comprised of\", \"object\": \"Poly lines\"},\n {\"subject\": \"Curves\", \"predicate\": \"can be\", \"object\": \"planar\"},\n {\"subject\": \"Curves\", \"predicate\": \"can be\", \"object\": \"non-planar\"},\n {\"subject\": \"Curves\", \"predicate\": \"can be\", \"object\": \"open\"},\n {\"subject\": \"Curves\", \"predicate\": \"can be\", \"object\": \"closed\"},\n {\"subject\": \"lines\", \"predicate\": \"can be\", \"object\": \"planar\"},\n {\"subject\": \"lines\", \"predicate\": \"can be\", \"object\": \"non-planar\"},\n {\"subject\": \"lines\", \"predicate\": \"can be\", \"object\": \"open\"},\n {\"subject\": \"lines\", \"predicate\": \"can be\", \"object\": \"closed\"},\n {\"subject\": \"Poly lines\", \"predicate\": \"can be\", \"object\": \"planar\"},\n {\"subject\": \"Poly lines\", \"predicate\": \"can be\", \"object\": \"non-planar\"},\n {\"subject\": \"Poly lines\", \"predicate\": \"can be\", \"object\": \"open\"},\n {\"subject\": \"Poly lines\", \"predicate\": \"can be\", \"object\": \"closed\"},\n {\"subject\": \"Curves\", \"predicate\": \"are building blocks to produce\", \"object\": \"complex forms\"},\n {\"subject\": \"lines\", \"predicate\": \"are building blocks to produce\", \"object\": \"complex forms\"},\n {\"subject\": \"Poly lines\", \"predicate\": \"are building blocks to produce\", \"object\": \"complex forms\"},\n {\"subject\": \"Curves\", \"predicate\": \"are building blocks to produce\", \"object\": \"geometries\"},\n {\"subject\": \"lines\", \"predicate\": \"are building blocks to produce\", \"object\": \"geometries\"},\n {\"subject\": \"Poly lines\", \"predicate\": \"are building blocks to produce\", \"object\": \"geometries\"}\n ]\n}\n```",
"error": null
}
},
{
"source": "03_2D Transformation and Deformation.docx",
"size_bucket": "small",
"doc_chars_original": 418,
"doc_chars_sent": 418,
"truncated": false,
"condition_a": {
"input_tokens": 245,
"output_tokens": 471,
"latency_s": 2.41,
"entity_count": 10,
"edge_count": 11,
"stop_reason": "end_turn",
"response_text": "```json\n{\n \"entities\": [\n {\"name\": \"Manipulation of geometry\", \"type\": \"Concept\"},\n {\"name\": \"Transformations\", \"type\": \"Category\"},\n {\"name\": \"Deformations\", \"type\": \"Category\"},\n {\"name\": \"proportions\", \"type\": \"Property\"},\n {\"name\": \"relative geometries\", \"type\": \"Property\"},\n {\"name\": \"curves\", \"type\": \"Object\"},\n {\"name\": \"objects\", \"type\": \"Object\"},\n {\"name\": \"space\", \"type\": \"Context\"},\n {\"name\": \"angles\", \"type\": \"Property\"},\n {\"name\": \"curvature\", \"type\": \"Property\"}\n ],\n \"edges\": [\n {\"subject\": \"Manipulation of geometry\", \"predicate\": \"can be broken down into\", \"object\": \"Transformations\"},\n {\"subject\": \"Manipulation of geometry\", \"predicate\": \"can be broken down into\", \"object\": \"Deformations\"},\n {\"subject\": \"Transformations\", \"predicate\": \"do not change\", \"object\": \"proportions\"},\n {\"subject\": \"Transformations\", \"predicate\": \"do not change\", \"object\": \"relative geometries\"},\n {\"subject\": \"Transformations\", \"predicate\": \"change\", \"object\": \"relationship to space\"},\n {\"subject\": \"Deformations\", \"predicate\": \"are changes made to\", \"object\": \"existing geometry\"},\n {\"subject\": \"Deformations\", \"predicate\": \"affect\", \"object\": \"proportions\"},\n {\"subject\": \"Deformations\", \"predicate\": \"affect\", \"object\": \"angles\"},\n {\"subject\": \"Deformations\", \"predicate\": \"affect\", \"object\": \"curvature\"},\n {\"subject\": \"proportions\", \"predicate\": \"belong to\", \"object\": \"curves\"},\n {\"subject\": \"proportions\", \"predicate\": \"belong to\", \"object\": \"objects\"}\n ]\n}\n```",
"error": null
},
"condition_b": {
"local_latency_s": 4.36,
"local_candidates": [
"Manipulation",
"Geometry",
"Transformations",
"Deformations",
"curves",
"objects",
"space"
],
"local_raw": "{\n \"candidates\": [\"Manipulation\", \"Geometry\", \"Transformations\", \"Deformations\", \"curves\", \"objects\", \"space\"]\n}",
"api_input_tokens": 311,
"api_output_tokens": 552,
"api_latency_s": 2.81,
"entity_count": 11,
"edge_count": 13,
"stop_reason": "end_turn",
"response_text": "```json\n{\n \"entities\": [\n {\"name\": \"Manipulation\", \"type\": \"Concept\"},\n {\"name\": \"Geometry\", \"type\": \"Concept\"},\n {\"name\": \"Transformations\", \"type\": \"Category\"},\n {\"name\": \"Deformations\", \"type\": \"Category\"},\n {\"name\": \"Curves\", \"type\": \"Geometric Element\"},\n {\"name\": \"Objects\", \"type\": \"Geometric Element\"},\n {\"name\": \"Space\", \"type\": \"Concept\"},\n {\"name\": \"Proportions\", \"type\": \"Property\"},\n {\"name\": \"Relative geometries\", \"type\": \"Property\"},\n {\"name\": \"Angles\", \"type\": \"Property\"},\n {\"name\": \"Curvature\", \"type\": \"Property\"}\n ],\n \"edges\": [\n {\"subject\": \"Manipulation\", \"predicate\": \"can be broken down into\", \"object\": \"Transformations\"},\n {\"subject\": \"Manipulation\", \"predicate\": \"can be broken down into\", \"object\": \"Deformations\"},\n {\"subject\": \"Transformations\", \"predicate\": \"is a category of\", \"object\": \"Geometry\"},\n {\"subject\": \"Deformations\", \"predicate\": \"is a category of\", \"object\": \"Geometry\"},\n {\"subject\": \"Transformations\", \"predicate\": \"do not change\", \"object\": \"Proportions\"},\n {\"subject\": \"Transformations\", \"predicate\": \"do not change\", \"object\": \"Relative geometries\"},\n {\"subject\": \"Transformations\", \"predicate\": \"apply to\", \"object\": \"Curves\"},\n {\"subject\": \"Transformations\", \"predicate\": \"apply to\", \"object\": \"Objects\"},\n {\"subject\": \"Transformations\", \"predicate\": \"change relationship to\", \"object\": \"Space\"},\n {\"subject\": \"Deformations\", \"predicate\": \"change\", \"object\": \"Proportions\"},\n {\"subject\": \"Deformations\", \"predicate\": \"change\", \"object\": \"Angles\"},\n {\"subject\": \"Deformations\", \"predicate\": \"change\", \"object\": \"Curvature\"},\n {\"subject\": \"Deformations\", \"predicate\": \"apply to\", \"object\": \"Geometry\"}\n ]\n}\n```",
"error": null
}
},
{
"source": "03_2D Transformation, Deformation, and Editing-AARON.docx",
"size_bucket": "small",
"doc_chars_original": 541,
"doc_chars_sent": 541,
"truncated": false,
"condition_a": {
"input_tokens": 271,
"output_tokens": 587,
"latency_s": 2.7,
"entity_count": 14,
"edge_count": 13,
"stop_reason": "end_turn",
"response_text": "```json\n{\n \"entities\": [\n {\"name\": \"Manipulation of geometry\", \"type\": \"Concept\"},\n {\"name\": \"Transformations\", \"type\": \"Category\"},\n {\"name\": \"Deformations\", \"type\": \"Category\"},\n {\"name\": \"proportions\", \"type\": \"Property\"},\n {\"name\": \"relative geometries\", \"type\": \"Property\"},\n {\"name\": \"curves\", \"type\": \"Geometric Element\"},\n {\"name\": \"objects\", \"type\": \"Geometric Element\"},\n {\"name\": \"space\", \"type\": \"Concept\"},\n {\"name\": \"angles\", \"type\": \"Property\"},\n {\"name\": \"curvature\", \"type\": \"Property\"},\n {\"name\": \"Editing Commands\", \"type\": \"Concept\"},\n {\"name\": \"geometry\", \"type\": \"Concept\"},\n {\"name\": \"sections\", \"type\": \"Geometric Element\"},\n {\"name\": \"form\", \"type\": \"Geometric Element\"}\n ],\n \"edges\": [\n {\"subject\": \"Manipulation of geometry\", \"predicate\": \"can be broken down into\", \"object\": \"Transformations\"},\n {\"subject\": \"Manipulation of geometry\", \"predicate\": \"can be broken down into\", \"object\": \"Deformations\"},\n {\"subject\": \"Transformations\", \"predicate\": \"do not change\", \"object\": \"proportions\"},\n {\"subject\": \"Transformations\", \"predicate\": \"do not change\", \"object\": \"relative geometries\"},\n {\"subject\": \"Transformations\", \"predicate\": \"change\", \"object\": \"space\"},\n {\"subject\": \"Deformations\", \"predicate\": \"are changes made to\", \"object\": \"geometry\"},\n {\"subject\": \"Deformations\", \"predicate\": \"affect\", \"object\": \"proportions\"},\n {\"subject\": \"Deformations\", \"predicate\": \"affect\", \"object\": \"angles\"},\n {\"subject\": \"Deformations\", \"predicate\": \"affect\", \"object\": \"curvature\"},\n {\"subject\": \"Editing Commands\", \"predicate\": \"can edit\", \"object\": \"geometry\"},\n {\"subject\": \"Editing Commands\", \"predicate\": \"involves adding\", \"object\": \"sections\"},\n {\"subject\": \"Editing Commands\", \"predicate\": \"involves combining\", \"object\": \"sections\"},\n {\"subject\": \"Editing Commands\", \"predicate\": \"involves removing\", \"object\": \"sections\"}\n ]\n}\n```",
"error": null
},
"condition_b": {
"local_latency_s": 6.06,
"local_candidates": [
"Manipulation",
"Geometry",
"Transformations",
"Deformations",
"Curves",
"Objects",
"Space",
"Editing Commands",
"Adding",
"Combining",
"Removing"
],
"local_raw": "{\n \"candidates\": [\"Manipulation\", \"Geometry\", \"Transformations\", \"Deformations\", \"Curves\", \"Objects\", \"Space\", \"Editing Commands\", \"Adding\", \"Combining\", \"Removing\"]\n}",
"api_input_tokens": 354,
"api_output_tokens": 620,
"api_latency_s": 3.03,
"entity_count": 15,
"edge_count": 13,
"stop_reason": "end_turn",
"response_text": "```json\n{\n \"entities\": [\n {\"name\": \"Manipulation\", \"type\": \"Concept\"},\n {\"name\": \"Geometry\", \"type\": \"Concept\"},\n {\"name\": \"Transformations\", \"type\": \"Category\"},\n {\"name\": \"Deformations\", \"type\": \"Category\"},\n {\"name\": \"Curves\", \"type\": \"Geometric Element\"},\n {\"name\": \"Objects\", \"type\": \"Geometric Element\"},\n {\"name\": \"Space\", \"type\": \"Concept\"},\n {\"name\": \"Proportions\", \"type\": \"Property\"},\n {\"name\": \"Relative Geometries\", \"type\": \"Property\"},\n {\"name\": \"Angles\", \"type\": \"Property\"},\n {\"name\": \"Curvature\", \"type\": \"Property\"},\n {\"name\": \"Editing Commands\", \"type\": \"Operation\"},\n {\"name\": \"Adding\", \"type\": \"Editing Operation\"},\n {\"name\": \"Combining\", \"type\": \"Editing Operation\"},\n {\"name\": \"Removing\", \"type\": \"Editing Operation\"}\n ],\n \"edges\": [\n {\"subject\": \"Manipulation\", \"predicate\": \"can be broken down into\", \"object\": \"Transformations\"},\n {\"subject\": \"Manipulation\", \"predicate\": \"can be broken down into\", \"object\": \"Deformations\"},\n {\"subject\": \"Transformations\", \"predicate\": \"do not change\", \"object\": \"Proportions\"},\n {\"subject\": \"Transformations\", \"predicate\": \"do not change\", \"object\": \"Relative Geometries\"},\n {\"subject\": \"Transformations\", \"predicate\": \"change\", \"object\": \"Space\"},\n {\"subject\": \"Deformations\", \"predicate\": \"are changes to\", \"object\": \"Geometry\"},\n {\"subject\": \"Deformations\", \"predicate\": \"affect\", \"object\": \"Proportions\"},\n {\"subject\": \"Deformations\", \"predicate\": \"affect\", \"object\": \"Angles\"},\n {\"subject\": \"Deformations\", \"predicate\": \"affect\", \"object\": \"Curvature\"},\n {\"subject\": \"Editing Commands\", \"predicate\": \"can edit\", \"object\": \"Geometry\"},\n {\"subject\": \"Adding\", \"predicate\": \"is a type of\", \"object\": \"Editing Commands\"},\n {\"subject\": \"Combining\", \"predicate\": \"is a type of\", \"object\": \"Editing Commands\"},\n {\"subject\": \"Removing\", \"predicate\": \"is a type of\", \"object\": \"Editing Commands\"}\n ]\n}\n```",
"error": null
}
},
{
"source": "03_Editing Geometry.docx",
"size_bucket": "small",
"doc_chars_original": 171,
"doc_chars_sent": 171,
"truncated": false,
"condition_a": {
"input_tokens": 195,
"output_tokens": 320,
"latency_s": 5.21,
"entity_count": 7,
"edge_count": 8,
"stop_reason": "end_turn",
"response_text": "```json\n{\n \"entities\": [\n {\"name\": \"Editing Commands\", \"type\": \"Feature\"},\n {\"name\": \"geometry\", \"type\": \"Object\"},\n {\"name\": \"sections\", \"type\": \"Component\"},\n {\"name\": \"form\", \"type\": \"Structure\"},\n {\"name\": \"adding\", \"type\": \"Operation\"},\n {\"name\": \"combining\", \"type\": \"Operation\"},\n {\"name\": \"removing\", \"type\": \"Operation\"}\n ],\n \"edges\": [\n {\"subject\": \"Editing Commands\", \"predicate\": \"can be used to edit\", \"object\": \"geometry\"},\n {\"subject\": \"geometry\", \"predicate\": \"can be further edited by\", \"object\": \"adding\"},\n {\"subject\": \"geometry\", \"predicate\": \"can be further edited by\", \"object\": \"combining\"},\n {\"subject\": \"geometry\", \"predicate\": \"can be further edited by\", \"object\": \"removing\"},\n {\"subject\": \"adding\", \"predicate\": \"modifies\", \"object\": \"sections\"},\n {\"subject\": \"combining\", \"predicate\": \"modifies\", \"object\": \"sections\"},\n {\"subject\": \"removing\", \"predicate\": \"modifies\", \"object\": \"sections\"},\n {\"subject\": \"sections\", \"predicate\": \"are part of\", \"object\": \"form\"}\n ]\n}\n```",
"error": null
},
"condition_b": {
"local_latency_s": 3.16,
"local_candidates": [
"Editing",
"Commands",
"Geometry",
"Adding",
"Combining",
"Removing",
"Sections"
],
"local_raw": "{\n \"candidates\": [\"Editing\", \"Commands\", \"Geometry\", \"Adding\", \"Combining\", \"Removing\", \"Sections\"]\n}",
"api_input_tokens": 259,
"api_output_tokens": 289,
"api_latency_s": 1.69,
"entity_count": 6,
"edge_count": 7,
"stop_reason": "end_turn",
"response_text": "```json\n{\n \"entities\": [\n {\"name\": \"Editing Commands\", \"type\": \"Topic\"},\n {\"name\": \"Geometry\", \"type\": \"Concept\"},\n {\"name\": \"Adding\", \"type\": \"Operation\"},\n {\"name\": \"Combining\", \"type\": \"Operation\"},\n {\"name\": \"Removing\", \"type\": \"Operation\"},\n {\"name\": \"Sections\", \"type\": \"Component\"}\n ],\n \"edges\": [\n {\"subject\": \"Geometry\", \"predicate\": \"can be edited through\", \"object\": \"Editing Commands\"},\n {\"subject\": \"Editing Commands\", \"predicate\": \"include\", \"object\": \"Adding\"},\n {\"subject\": \"Editing Commands\", \"predicate\": \"include\", \"object\": \"Combining\"},\n {\"subject\": \"Editing Commands\", \"predicate\": \"include\", \"object\": \"Removing\"},\n {\"subject\": \"Adding\", \"predicate\": \"operates on\", \"object\": \"Sections\"},\n {\"subject\": \"Combining\", \"predicate\": \"operates on\", \"object\": \"Sections\"},\n {\"subject\": \"Removing\", \"predicate\": \"operates on\", \"object\": \"Sections\"}\n ]\n}\n```",
"error": null
}
},
{
"source": "04_Annotations.docx",
"size_bucket": "small",
"doc_chars_original": 737,
"doc_chars_sent": 737,
"truncated": false,
"condition_a": {
"input_tokens": 318,
"output_tokens": 692,
"latency_s": 3.44,
"entity_count": 17,
"edge_count": 17,
"stop_reason": "end_turn",
"response_text": "```json\n{\n \"entities\": [\n {\"name\": \"Annotations\", \"type\": \"Feature\"},\n {\"name\": \"drawing\", \"type\": \"Document\"},\n {\"name\": \"manufacturing\", \"type\": \"Process\"},\n {\"name\": \"3D viewing program\", \"type\": \"Software\"},\n {\"name\": \"Annotation Settings\", \"type\": \"Configuration\"},\n {\"name\": \"scale\", \"type\": \"Property\"},\n {\"name\": \"location\", \"type\": \"Property\"},\n {\"name\": \"ratio\", \"type\": \"Property\"},\n {\"name\": \"dimension\", \"type\": \"Element\"},\n {\"name\": \"object\", \"type\": \"Entity\"},\n {\"name\": \"Options\", \"type\": \"Menu\"},\n {\"name\": \"Document properties\", \"type\": \"Menu\"},\n {\"name\": \"Annotation Styles\", \"type\": \"Menu\"},\n {\"name\": \"icon\", \"type\": \"UI Element\"},\n {\"name\": \"pop up window\", \"type\": \"UI Element\"},\n {\"name\": \"Annotation style\", \"type\": \"Configuration\"},\n {\"name\": \"APPLY\", \"type\": \"Button\"}\n ],\n \"edges\": [\n {\"subject\": \"Annotations\", \"predicate\": \"communicate\", \"object\": \"technical elements\"},\n {\"subject\": \"Annotations\", \"predicate\": \"used for\", \"object\": \"drawing\"},\n {\"subject\": \"Annotations\", \"predicate\": \"help with\", \"object\": \"manufacturing\"},\n {\"subject\": \"Annotations\", \"predicate\": \"alternative to\", \"object\": \"3D viewing program\"},\n {\"subject\": \"Annotation Settings\", \"predicate\": \"requires proper setup of\", \"object\": \"scale\"},\n {\"subject\": \"Annotation Settings\", \"predicate\": \"requires proper setup of\", \"object\": \"location\"},\n {\"subject\": \"Annotation Settings\", \"predicate\": \"requires proper setup of\", \"object\": \"ratio\"},\n {\"subject\": \"scale\", \"predicate\": \"applies to\", \"object\": \"dimension\"},\n {\"subject\": \"location\", \"predicate\": \"applies to\", \"object\": \"dimension\"},\n {\"subject\": \"ratio\", \"predicate\": \"applies to\", \"object\": \"dimension\"},\n {\"subject\": \"dimension\", \"predicate\": \"should be proportional to\", \"object\": \"object\"},\n {\"subject\": \"Options\", \"predicate\": \"contains\", \"object\": \"Document properties\"},\n {\"subject\": \"Document properties\", \"predicate\": \"contains\", \"object\": \"Annotation Styles\"},\n {\"subject\": \"icon\", \"predicate\": \"opens\", \"object\": \"pop up window\"},\n {\"subject\": \"pop up window\", \"predicate\": \"displays\", \"object\": \"Annotation style\"},\n {\"subject\": \"Annotation style\", \"predicate\": \"can be edited\", \"object\": \"Annotation Settings\"},\n {\"subject\": \"APPLY\", \"predicate\": \"confirms\", \"object\": \"changes\"}\n ]\n}\n```",
"error": null
},
"condition_b": {
"local_latency_s": 7.22,
"local_candidates": [
"Annotations",
"drawing",
"object",
"manufacturing",
"3D viewing program",
"Options",
"Document properties",
"Annotation Styles",
"icon",
"pop up window"
],
"local_raw": "{\n \"candidates\": [\"Annotations\", \"drawing\", \"object\", \"manufacturing\", \"3D viewing program\", \"Options\", \"Document properties\", \"Annotation Styles\", \"icon\", \"pop up window\"]\n}",
"api_input_tokens": 397,
"api_output_tokens": 619,
"api_latency_s": 3.23,
"entity_count": 16,
"edge_count": 14,
"stop_reason": "end_turn",
"response_text": "```json\n{\n \"entities\": [\n {\"name\": \"Annotations\", \"type\": \"Feature\"},\n {\"name\": \"drawing\", \"type\": \"Document\"},\n {\"name\": \"object\", \"type\": \"Item\"},\n {\"name\": \"manufacturing\", \"type\": \"Process\"},\n {\"name\": \"3D viewing program\", \"type\": \"Software\"},\n {\"name\": \"Annotation Settings\", \"type\": \"Configuration\"},\n {\"name\": \"scale\", \"type\": \"Property\"},\n {\"name\": \"location\", \"type\": \"Property\"},\n {\"name\": \"ratio\", \"type\": \"Property\"},\n {\"name\": \"dimension\", \"type\": \"Property\"},\n {\"name\": \"Options\", \"type\": \"Menu\"},\n {\"name\": \"Document properties\", \"type\": \"Menu\"},\n {\"name\": \"Annotation Styles\", \"type\": \"Menu\"},\n {\"name\": \"icon\", \"type\": \"UI Element\"},\n {\"name\": \"pop up window\", \"type\": \"UI Element\"},\n {\"name\": \"Annotation style\", \"type\": \"Configuration\"}\n ],\n \"edges\": [\n {\"subject\": \"Annotations\", \"predicate\": \"communicate\", \"object\": \"technical elements of a drawing\"},\n {\"subject\": \"Annotations\", \"predicate\": \"help communicate to\", \"object\": \"manufacturing\"},\n {\"subject\": \"Annotations\", \"predicate\": \"help communicate to\", \"object\": \"someone to review\"},\n {\"subject\": \"someone to review\", \"predicate\": \"may lack access to\", \"object\": \"3D viewing program\"},\n {\"subject\": \"Annotation Settings\", \"predicate\": \"requires proper setup of\", \"object\": \"scale\"},\n {\"subject\": \"Annotation Settings\", \"predicate\": \"requires proper setup of\", \"object\": \"location\"},\n {\"subject\": \"Annotation Settings\", \"predicate\": \"requires proper setup of\", \"object\": \"ratio\"},\n {\"subject\": \"Annotation Settings\", \"predicate\": \"requires proper setup of\", \"object\": \"dimension\"},\n {\"subject\": \"Options\", \"predicate\": \"contains\", \"object\": \"Document properties\"},\n {\"subject\": \"Document properties\", \"predicate\": \"contains\", \"object\": \"Annotation Styles\"},\n {\"subject\": \"icon\", \"predicate\": \"provides access to\", \"object\": \"pop up window\"},\n {\"subject\": \"pop up window\", \"predicate\": \"displays\", \"object\": \"Annotation style\"},\n {\"subject\": \"user\", \"predicate\": \"clicks\", \"object\": \"APPLY\"},\n {\"subject\": \"APPLY\", \"predicate\": \"confirms\", \"object\": \"changes\"}\n ]\n}\n```",
"error": null
}
},
{
"source": "00_Syllabus.docx",
"size_bucket": "medium",
"doc_chars_original": 2273,
"doc_chars_sent": 2273,
"truncated": false,
"condition_a": {
"input_tokens": 607,
"output_tokens": 1159,
"latency_s": 5.11,
"entity_count": 30,
"edge_count": 26,
"stop_reason": "end_turn",
"response_text": "```json\n{\n \"entities\": [\n {\"name\": \"3D Computer Aided Design drawing\", \"type\": \"subject\"},\n {\"name\": \"rapid manufacturing\", \"type\": \"subject\"},\n {\"name\": \"Rhino3D\", \"type\": \"software\"},\n {\"name\": \"NURBS geometry\", \"type\": \"concept\"},\n {\"name\": \"3D printing\", \"type\": \"technology\"},\n {\"name\": \"CAD software\", \"type\": \"software\"},\n {\"name\": \"digital fabrication software\", \"type\": \"software\"},\n {\"name\": \"digital fabrication hardware\", \"type\": \"hardware\"},\n {\"name\": \"spatial awareness\", \"type\": \"skill\"},\n {\"name\": \"scales\", \"type\": \"concept\"},\n {\"name\": \"measuring devices\", \"type\": \"tool\"},\n {\"name\": \"physical dimensions\", \"type\": \"concept\"},\n {\"name\": \"virtual space\", \"type\": \"concept\"},\n {\"name\": \"three dimensional form\", \"type\": \"concept\"},\n {\"name\": \"digital modeling\", \"type\": \"skill\"},\n {\"name\": \"three dimensional design principals\", \"type\": \"concept\"},\n {\"name\": \"Desktop FDM printing\", \"type\": \"technology\"},\n {\"name\": \"printing process\", \"type\": \"process\"},\n {\"name\": \"tolerances\", \"type\": \"concept\"},\n {\"name\": \"critical thinking\", \"type\": \"skill\"},\n {\"name\": \"problem solving skills\", \"type\": \"skill\"},\n {\"name\": \"research methods\", \"type\": \"skill\"},\n {\"name\": \"Rhino Level 1 Training Guide\", \"type\": \"material\"},\n {\"name\": \"assignments\", \"type\": \"assessment\"},\n {\"name\": \"grade\", \"type\": \"assessment\"},\n {\"name\": \"A grade\", \"type\": \"grade\"},\n {\"name\": \"B grade\", \"type\": \"grade\"},\n {\"name\": \"C grade\", \"type\": \"grade\"},\n {\"name\": \"D grade\", \"type\": \"grade\"},\n {\"name\": \"F grade\", \"type\": \"grade\"}\n ],\n \"edges\": [\n {\"subject\": \"course\", \"predicate\": \"introduces\", \"object\": \"3D Computer Aided Design drawing\"},\n {\"subject\": \"course\", \"predicate\": \"introduces\", \"object\": \"rapid manufacturing\"},\n {\"subject\": \"course\", \"predicate\": \"uses\", \"object\": \"Rhino3D\"},\n {\"subject\": \"students\", \"predicate\": \"become acquainted with\", \"object\": \"virtual space\"},\n {\"subject\": \"students\", \"predicate\": \"become acquainted with\", \"object\": \"NURBS geometry\"},\n {\"subject\": \"students\", \"predicate\": \"gain hands on technical skills with\", \"object\": \"digital fabrication software\"},\n {\"subject\": \"students\", \"predicate\": \"gain hands on technical skills with\", \"object\": \"digital fabrication hardware\"},\n {\"subject\": \"students\", \"predicate\": \"demonstrate\", \"object\": \"spatial awareness\"},\n {\"subject\": \"students\", \"predicate\": \"use\", \"object\": \"scales\"},\n {\"subject\": \"students\", \"predicate\": \"use\", \"object\": \"measuring devices\"},\n {\"subject\": \"students\", \"predicate\": \"translate\", \"object\": \"physical dimensions into virtual space\"},\n {\"subject\": \"students\", \"predicate\": \"gain working knowledge of\", \"object\": \"three dimensional form\"},\n {\"subject\": \"students\", \"predicate\": \"apply\", \"object\": \"three dimensional design principals\"},\n {\"subject\": \"students\", \"predicate\": \"gain understanding of designing for\", \"object\": \"Desktop FDM printing\"},\n {\"subject\": \"students\", \"predicate\": \"understand\", \"object\": \"printing process\"},\n {\"subject\": \"students\", \"predicate\": \"apply\", \"object\": \"tolerances\"},\n {\"subject\": \"students\", \"predicate\": \"use\", \"object\": \"critical thinking\"},\n {\"subject\": \"students\", \"predicate\": \"use\", \"object\": \"problem solving skills\"},\n {\"subject\": \"students\", \"predicate\": \"use\", \"object\": \"research methods\"},\n {\"subject\": \"course\", \"predicate\": \"requires\", \"object\": \"Rhino Level 1 Training Guide\"},\n {\"subject\": \"late assignments\", \"predicate\": \"result in grade reduction of\", \"object\": \"10% per day\"},\n {\"subject\": \"A grade\", \"predicate\": \"represents\", \"object\": \"Outstanding, excellent work\"},\n {\"subject\": \"B grade\", \"predicate\": \"represents\", \"object\": \"Good, competent work\"},\n {\"subject\": \"C grade\", \"predicate\": \"represents\", \"object\": \"Adequate, reasonably satisfactory work\"},\n {\"subject\": \"D grade\", \"predicate\": \"represents\", \"object\": \"Minimally acceptable work\"},\n {\"subject\": \"F grade\",",
"error": null
},
"condition_b": {
"local_latency_s": 16.34,
"local_candidates": [
"Rhino3D",
"CAD software",
"NURBS geometry",
"3D printing",
"Digital fabrication software",
"Digital fabrication hardware",
"Desktop FDM printing",
"Rhino Level 1 Training Guide"
],
"local_raw": "{\n \"candidates\": [\"Rhino3D\", \"CAD software\", \"NURBS geometry\", \"3D printing\", \"Digital fabrication software\", \"Digital fabrication hardware\", \"Desktop FDM printing\", \"Rhino Level 1 Training Guide\"]\n}",
"api_input_tokens": 699,
"api_output_tokens": 858,
"api_latency_s": 4.39,
"entity_count": 18,
"edge_count": 19,
"stop_reason": "end_turn",
"response_text": "```json\n{\n \"entities\": [\n {\"name\": \"Rhino3D\", \"type\": \"CAD software\"},\n {\"name\": \"CAD software\", \"type\": \"software category\"},\n {\"name\": \"NURBS geometry\", \"type\": \"geometric modeling technique\"},\n {\"name\": \"3D printing\", \"type\": \"manufacturing technology\"},\n {\"name\": \"digital fabrication software\", \"type\": \"software category\"},\n {\"name\": \"digital fabrication hardware\", \"type\": \"hardware category\"},\n {\"name\": \"Desktop FDM printing\", \"type\": \"3D printing technology\"},\n {\"name\": \"Rhino Level 1 Training Guide\", \"type\": \"educational resource\"},\n {\"name\": \"3D Computer Aided Design\", \"type\": \"design discipline\"},\n {\"name\": \"rapid manufacturing\", \"type\": \"manufacturing approach\"},\n {\"name\": \"virtual space\", \"type\": \"digital environment\"},\n {\"name\": \"tangible objects\", \"type\": \"physical output\"},\n {\"name\": \"spatial awareness\", \"type\": \"skill\"},\n {\"name\": \"three dimensional form\", \"type\": \"design concept\"},\n {\"name\": \"digital modeling\", \"type\": \"design process\"},\n {\"name\": \"three dimensional design principals\", \"type\": \"design concept\"},\n {\"name\": \"file preparation\", \"type\": \"technical process\"},\n {\"name\": \"tolerances\", \"type\": \"technical specification\"}\n ],\n \"edges\": [\n {\"subject\": \"Rhino3D\", \"predicate\": \"is a type of\", \"object\": \"CAD software\"},\n {\"subject\": \"Rhino3D\", \"predicate\": \"supports\", \"object\": \"NURBS geometry\"},\n {\"subject\": \"Rhino3D\", \"predicate\": \"enables\", \"object\": \"3D Computer Aided Design\"},\n {\"subject\": \"3D Computer Aided Design\", \"predicate\": \"uses\", \"object\": \"CAD software\"},\n {\"subject\": \"3D Computer Aided Design\", \"predicate\": \"involves\", \"object\": \"NURBS geometry\"},\n {\"subject\": \"3D Computer Aided Design\", \"predicate\": \"enables\", \"object\": \"rapid manufacturing\"},\n {\"subject\": \"rapid manufacturing\", \"predicate\": \"uses\", \"object\": \"3D printing\"},\n {\"subject\": \"3D printing\", \"predicate\": \"produces\", \"object\": \"tangible objects\"},\n {\"subject\": \"digital fabrication software\", \"predicate\": \"includes\", \"object\": \"Rhino3D\"},\n {\"subject\": \"Desktop FDM printing\", \"predicate\": \"is a type of\", \"object\": \"3D printing\"},\n {\"subject\": \"course\", \"predicate\": \"teaches\", \"object\": \"Rhino3D\"},\n {\"subject\": \"course\", \"predicate\": \"teaches\", \"object\": \"digital fabrication software\"},\n {\"subject\": \"course\", \"predicate\": \"teaches\", \"object\": \"digital fabrication hardware\"},\n {\"subject\": \"course\", \"predicate\": \"teaches\", \"object\": \"Desktop FDM printing\"},\n {\"subject\": \"Rhino Level 1 Training Guide\", \"predicate\": \"provides training for\", \"object\": \"Rhino3D\"},\n {\"subject\": \"students\", \"predicate\": \"learn\", \"object\": \"three dimensional design principals\"},\n {\"subject\": \"students\", \"predicate\": \"develop\", \"object\": \"spatial awareness\"},\n {\"subject\": \"file preparation\", \"predicate\": \"requires understanding of\", \"object\": \"tolerances\"},\n {\"subject\": \"Desktop FDM printing\", \"predicate\": \"has\", \"object\": \"tolerances\"}\n ]\n}\n```",
"error": null
}
},
{
"source": "01_ALL_Overview of AM and 3DP_v3.pptx",
"size_bucket": "large",
"doc_chars_original": 5833,
"doc_chars_sent": 5833,
"truncated": false,
"condition_a": {
"input_tokens": 1579,
"output_tokens": 1602,
"latency_s": 7.65,
"entity_count": 34,
"edge_count": 34,
"stop_reason": "end_turn",
"response_text": "```json\n{\n \"entities\": [\n {\"name\": \"Additive Manufacturing\", \"type\": \"Manufacturing Process\"},\n {\"name\": \"3D Printing\", \"type\": \"Manufacturing Process\"},\n {\"name\": \"Cutting\", \"type\": \"Manufacturing Process\"},\n {\"name\": \"Subtractive Manufacturing\", \"type\": \"Manufacturing Process\"},\n {\"name\": \"Forming\", \"type\": \"Manufacturing Process\"},\n {\"name\": \"ASTM International Committee F42\", \"type\": \"Organization\"},\n {\"name\": \"Wohler's Report 2014\", \"type\": \"Document\"},\n {\"name\": \"Laser Cutting\", \"type\": \"Manufacturing Technique\"},\n {\"name\": \"Carving\", \"type\": \"Manufacturing Technique\"},\n {\"name\": \"Drilling\", \"type\": \"Manufacturing Technique\"},\n {\"name\": \"Milling\", \"type\": \"Manufacturing Technique\"},\n {\"name\": \"Chiseling\", \"type\": \"Manufacturing Technique\"},\n {\"name\": \"Glass Blowing\", \"type\": \"Manufacturing Technique\"},\n {\"name\": \"Vacuum Forming\", \"type\": \"Manufacturing Technique\"},\n {\"name\": \"Hydroforming\", \"type\": \"Manufacturing Technique\"},\n {\"name\": \"PolyJet\", \"type\": \"3D Printing Technology\"},\n {\"name\": \"FDM\", \"type\": \"3D Printing Technology\"},\n {\"name\": \"Neolithic Revolution\", \"type\": \"Historical Period\"},\n {\"name\": \"First Industrial Revolution\", \"type\": \"Historical Period\"},\n {\"name\": \"Second Industrial Revolution\", \"type\": \"Historical Period\"},\n {\"name\": \"Digital Revolution\", \"type\": \"Historical Period\"},\n {\"name\": \"Third Industrial Revolution\", \"type\": \"Historical Period\"},\n {\"name\": \"Decentralized Manufacturing\", \"type\": \"Business Model\"},\n {\"name\": \"Economies of Scale\", \"type\": \"Economic Concept\"},\n {\"name\": \"Economies of Scope\", \"type\": \"Economic Concept\"},\n {\"name\": \"McKinsey & Company\", \"type\": \"Organization\"},\n {\"name\": \"Module 1\", \"type\": \"Educational Module\"},\n {\"name\": \"Freedom\", \"type\": \"Advantage\"},\n {\"name\": \"Closed System\", \"type\": \"Advantage\"},\n {\"name\": \"Quick Production\", \"type\": \"Advantage\"},\n {\"name\": \"Multiple Materials One Print\", \"type\": \"Advantage\"},\n {\"name\": \"Low-cost Manufacturing\", \"type\": \"Advantage\"},\n {\"name\": \"Real Thermoplastics\", \"type\": \"Material\"},\n {\"name\": \"Factory of Tomorrow\", \"type\": \"Concept\"}\n ],\n \"edges\": [\n {\"subject\": \"3D Printing\", \"predicate\": \"is a type of\", \"object\": \"Additive Manufacturing\"},\n {\"subject\": \"Additive Manufacturing\", \"predicate\": \"is defined by\", \"object\": \"ASTM International Committee F42\"},\n {\"subject\": \"Additive Manufacturing\", \"predicate\": \"is documented in\", \"object\": \"Wohler's Report 2014\"},\n {\"subject\": \"Cutting\", \"predicate\": \"is a\", \"object\": \"Manufacturing Process\"},\n {\"subject\": \"Subtractive Manufacturing\", \"predicate\": \"is a\", \"object\": \"Manufacturing Process\"},\n {\"subject\": \"Forming\", \"predicate\": \"is a\", \"object\": \"Manufacturing Process\"},\n {\"subject\": \"Laser Cutting\", \"predicate\": \"is a type of\", \"object\": \"Cutting\"},\n {\"subject\": \"Carving\", \"predicate\": \"is a type of\", \"object\": \"Subtractive Manufacturing\"},\n {\"subject\": \"Drilling\", \"predicate\": \"is a type of\", \"object\": \"Subtractive Manufacturing\"},\n {\"subject\": \"Milling\", \"predicate\": \"is a type of\", \"object\": \"Subtractive Manufacturing\"},\n {\"subject\": \"Chiseling\", \"predicate\": \"is a type of\", \"object\": \"Subtractive Manufacturing\"},\n {\"subject\": \"Glass Blowing\", \"predicate\": \"is a type of\", \"object\": \"Forming\"},\n {\"subject\": \"Vacuum Forming\", \"predicate\": \"is a type of\", \"object\": \"Forming\"},\n {\"subject\": \"Hydroforming\", \"predicate\": \"is a type of\", \"object\": \"Forming\"},\n {\"subject\": \"PolyJet\", \"predicate\": \"is a technology used in\", \"object\": \"Additive Manufacturing\"},\n {\"subject\": \"FDM\", \"predicate\": \"is a technology used in\", \"object\": \"Additive Manufacturing\"},\n {\"subject\": \"Additive Manufacturing\", \"predicate\": \"has advantage of\", \"object\": \"Freedom\"},\n {\"subject\": \"Additive Manufacturing\", \"predicate\": \"has advantage of\", \"object\": \"Closed System\"},\n {\"subject\": \"Additive Manufacturing\", \"predicate\": \"has advantage of\", \"object\": \"Quic",
"error": null
},
"condition_b": {
"local_latency_s": 69.09,
"local_candidates": [
"ASTM international committee F42",
"Wohler's Report 2014",
"3D Printing",
"Additive Manufacturing",
"Cutting",
"Subtractive Manufacturing",
"Forming",
"Laser cutting",
"Yellow Green Orange fluorescent marker",
"photosteve101",
"Carving",
"Drilling",
"Milling",
"Chiseling",
"Vacuum Forming",
"Dennis van Zuijlekom",
"Hydroforming",
"Aven Orgnac Salle Sup",
"Benh LIEU SONG",
"PolyJet",
"FDM",
"Neolithic Revolution",
"1st Industrial Revolution",
"2nd Industrial Revolution",
"Digital Revolution",
"3rd Industrial Revolution",
"McKinsey & Company"
],
"local_raw": "{\n \"candidates\": [\n \"ASTM international committee F42\",\n \"Wohler's Report 2014\",\n \"3D Printing\",\n \"Additive Manufacturing\",\n \"Cutting\",\n \"Subtractive Manufacturing\",\n \"Forming\",\n \"Laser cutting\",\n \"Yellow Green Orange fluorescent marker\",\n \"photosteve101\",\n \"Carving\",\n \"Drilling\",\n \"Milling\",\n \"Chiseling\",\n \"Vacuum Forming\",\n \"Dennis van Zuijlekom\",\n \"Hydroforming\",\n \"Aven Orgnac Salle Sup\",\n \"Benh LIEU SONG\",\n \"PolyJet\",\n \"FDM\",\n \"Neolithic Revolution\",\n \"1st Industrial Revolution\",\n \"2nd Industrial Revolution\",\n \"Digital Revolution\",\n \"3rd Industrial Revolution\",\n \"McKinsey & Company\"\n ]\n}",
"api_input_tokens": 1798,
"api_output_tokens": 1354,
"api_latency_s": 6.58,
"entity_count": 26,
"edge_count": 29,
"stop_reason": "end_turn",
"response_text": "```json\n{\n \"entities\": [\n {\"name\": \"Additive Manufacturing\", \"type\": \"Manufacturing Process\"},\n {\"name\": \"3D Printing\", \"type\": \"Manufacturing Process\"},\n {\"name\": \"Cutting\", \"type\": \"Manufacturing Process\"},\n {\"name\": \"Subtractive Manufacturing\", \"type\": \"Manufacturing Process\"},\n {\"name\": \"Forming\", \"type\": \"Manufacturing Process\"},\n {\"name\": \"Laser cutting\", \"type\": \"Manufacturing Technique\"},\n {\"name\": \"Carving\", \"type\": \"Manufacturing Technique\"},\n {\"name\": \"Drilling\", \"type\": \"Manufacturing Technique\"},\n {\"name\": \"Milling\", \"type\": \"Manufacturing Technique\"},\n {\"name\": \"Chiseling\", \"type\": \"Manufacturing Technique\"},\n {\"name\": \"Vacuum Forming\", \"type\": \"Manufacturing Technique\"},\n {\"name\": \"Hydroforming\", \"type\": \"Manufacturing Technique\"},\n {\"name\": \"Glass Blowing\", \"type\": \"Manufacturing Technique\"},\n {\"name\": \"PolyJet\", \"type\": \"3D Printing Technology\"},\n {\"name\": \"FDM\", \"type\": \"3D Printing Technology\"},\n {\"name\": \"ASTM international committee F42\", \"type\": \"Standards Organization\"},\n {\"name\": \"Wohler's Report 2014\", \"type\": \"Publication\"},\n {\"name\": \"Neolithic Revolution\", \"type\": \"Historical Period\"},\n {\"name\": \"1st Industrial Revolution\", \"type\": \"Historical Period\"},\n {\"name\": \"2nd Industrial Revolution\", \"type\": \"Historical Period\"},\n {\"name\": \"Digital Revolution\", \"type\": \"Historical Period\"},\n {\"name\": \"3rd Industrial Revolution\", \"type\": \"Historical Period\"},\n {\"name\": \"McKinsey & Company\", \"type\": \"Organization\"},\n {\"name\": \"Dennis van Zuijlekom\", \"type\": \"Photographer\"},\n {\"name\": \"photosteve101\", \"type\": \"Photographer\"},\n {\"name\": \"Benh LIEU SONG\", \"type\": \"Photographer\"}\n ],\n \"edges\": [\n {\"subject\": \"3D Printing\", \"predicate\": \"is a type of\", \"object\": \"Additive Manufacturing\"},\n {\"subject\": \"Additive Manufacturing\", \"predicate\": \"is defined by\", \"object\": \"ASTM international committee F42\"},\n {\"subject\": \"3D Printing\", \"predicate\": \"is defined by\", \"object\": \"ASTM international committee F42\"},\n {\"subject\": \"Additive Manufacturing\", \"predicate\": \"is documented in\", \"object\": \"Wohler's Report 2014\"},\n {\"subject\": \"3D Printing\", \"predicate\": \"is documented in\", \"object\": \"Wohler's Report 2014\"},\n {\"subject\": \"Cutting\", \"predicate\": \"is a type of\", \"object\": \"Manufacturing Process\"},\n {\"subject\": \"Subtractive Manufacturing\", \"predicate\": \"is a type of\", \"object\": \"Manufacturing Process\"},\n {\"subject\": \"Forming\", \"predicate\": \"is a type of\", \"object\": \"Manufacturing Process\"},\n {\"subject\": \"Laser cutting\", \"predicate\": \"is a technique within\", \"object\": \"Cutting\"},\n {\"subject\": \"Carving\", \"predicate\": \"is a technique within\", \"object\": \"Subtractive Manufacturing\"},\n {\"subject\": \"Drilling\", \"predicate\": \"is a technique within\", \"object\": \"Subtractive Manufacturing\"},\n {\"subject\": \"Milling\", \"predicate\": \"is a technique within\", \"object\": \"Subtractive Manufacturing\"},\n {\"subject\": \"Chiseling\", \"predicate\": \"is a technique within\", \"object\": \"Subtractive Manufacturing\"},\n {\"subject\": \"Vacuum Forming\", \"predicate\": \"is a technique within\", \"object\": \"Forming\"},\n {\"subject\": \"Hydroforming\", \"predicate\": \"is a technique within\", \"object\": \"Forming\"},\n {\"subject\": \"Glass Blowing\", \"predicate\": \"is a technique within\", \"object\": \"Forming\"},\n {\"subject\": \"PolyJet\", \"predicate\": \"is a technology used in\", \"object\": \"Additive Manufacturing\"},\n {\"subject\": \"FDM\", \"predicate\": \"is a technology used in\", \"object\": \"Additive Manufacturing\"},\n {\"subject\": \"Additive Manufacturing\", \"predicate\": \"offers advantage of\", \"object\": \"Freedom\"},\n {\"subject\": \"Additive Manufacturing\", \"predicate\": \"offers advantage of\", \"object\": \"Quick Production\"},\n {\"subject\": \"Additive Manufacturing\", \"predicate\": \"offers advantage of\", \"object\": \"Multiple Materials, One Print\"},\n {\"subject\": \"Additive Manufacturing\", \"predicate\": \"offers advantage of\", \"object\": \"Closed System\"},\n {\"subject\": \"1st",
"error": null
}
},
{
"source": "01_NURBS Curves.docx",
"size_bucket": "medium",
"doc_chars_original": 1401,
"doc_chars_sent": 1401,
"truncated": false,
"condition_a": {
"input_tokens": 506,
"output_tokens": 1479,
"latency_s": 6.58,
"entity_count": 31,
"edge_count": 36,
"stop_reason": "end_turn",
"response_text": "```json\n{\n \"entities\": [\n {\"name\": \"Rhino\", \"type\": \"Software\"},\n {\"name\": \"NURBS\", \"type\": \"Mathematical Representation\"},\n {\"name\": \"Geometry\", \"type\": \"Concept\"},\n {\"name\": \"3-D geometry\", \"type\": \"Geometric Form\"},\n {\"name\": \"2-D line\", \"type\": \"Geometric Shape\"},\n {\"name\": \"circle\", \"type\": \"Geometric Shape\"},\n {\"name\": \"arc\", \"type\": \"Geometric Shape\"},\n {\"name\": \"curve\", \"type\": \"Geometric Shape\"},\n {\"name\": \"3-D organic free-form surface\", \"type\": \"Geometric Shape\"},\n {\"name\": \"solid\", \"type\": \"Geometric Shape\"},\n {\"name\": \"illustration\", \"type\": \"Process\"},\n {\"name\": \"animation\", \"type\": \"Process\"},\n {\"name\": \"manufacturing\", \"type\": \"Process\"},\n {\"name\": \"point\", \"type\": \"Geometric Element\"},\n {\"name\": \"Cartesian space\", \"type\": \"Coordinate System\"},\n {\"name\": \"X axis\", \"type\": \"Coordinate Axis\"},\n {\"name\": \"Y axis\", \"type\": \"Coordinate Axis\"},\n {\"name\": \"Z axis\", \"type\": \"Coordinate Axis\"},\n {\"name\": \"2D geometry\", \"type\": \"Geometric Form\"},\n {\"name\": \"3D geometry\", \"type\": \"Geometric Form\"},\n {\"name\": \"line\", \"type\": \"Geometric Shape\"},\n {\"name\": \"polygon\", \"type\": \"Geometric Shape\"},\n {\"name\": \"ellipse\", \"type\": \"Geometric Shape\"},\n {\"name\": \"helix\", \"type\": \"Geometric Shape\"},\n {\"name\": \"spiral\", \"type\": \"Geometric Shape\"},\n {\"name\": \"open curve\", \"type\": \"Curve Type\"},\n {\"name\": \"closed curve\", \"type\": \"Curve Type\"},\n {\"name\": \"planar curve\", \"type\": \"Curve Type\"},\n {\"name\": \"non-planar curve\", \"type\": \"Curve Type\"},\n {\"name\": \"single curve\", \"type\": \"Curve Category\"},\n {\"name\": \"polycurve\", \"type\": \"Curve Category\"}\n ],\n \"edges\": [\n {\"subject\": \"Rhino\", \"predicate\": \"represents all geometry with\", \"object\": \"NURBS\"},\n {\"subject\": \"NURBS\", \"predicate\": \"are mathematical representations of\", \"object\": \"3-D geometry\"},\n {\"subject\": \"NURBS\", \"predicate\": \"can accurately describe\", \"object\": \"2-D line\"},\n {\"subject\": \"NURBS\", \"predicate\": \"can accurately describe\", \"object\": \"circle\"},\n {\"subject\": \"NURBS\", \"predicate\": \"can accurately describe\", \"object\": \"arc\"},\n {\"subject\": \"NURBS\", \"predicate\": \"can accurately describe\", \"object\": \"curve\"},\n {\"subject\": \"NURBS\", \"predicate\": \"can accurately describe\", \"object\": \"3-D organic free-form surface\"},\n {\"subject\": \"NURBS\", \"predicate\": \"can accurately describe\", \"object\": \"solid\"},\n {\"subject\": \"NURBS models\", \"predicate\": \"can be used in\", \"object\": \"illustration\"},\n {\"subject\": \"NURBS models\", \"predicate\": \"can be used in\", \"object\": \"animation\"},\n {\"subject\": \"NURBS models\", \"predicate\": \"can be used in\", \"object\": \"manufacturing\"},\n {\"subject\": \"point\", \"predicate\": \"is the most basic element in\", \"object\": \"Rhino\"},\n {\"subject\": \"point\", \"predicate\": \"represents a position in\", \"object\": \"Cartesian space\"},\n {\"subject\": \"position\", \"predicate\": \"is expressed along\", \"object\": \"X axis\"},\n {\"subject\": \"position\", \"predicate\": \"is expressed along\", \"object\": \"Y axis\"},\n {\"subject\": \"position\", \"predicate\": \"is expressed along\", \"object\": \"Z axis\"},\n {\"subject\": \"points\", \"predicate\": \"are used to define\", \"object\": \"2D geometry\"},\n {\"subject\": \"points\", \"predicate\": \"are used to define\", \"object\": \"3D geometry\"},\n {\"subject\": \"curve\", \"predicate\": \"is a 2D geometric form in Rhino\", \"object\": \"Geometry\"},\n {\"subject\": \"curve\", \"predicate\": \"includes\", \"object\": \"line\"},\n {\"subject\": \"curve\", \"predicate\": \"includes\", \"object\": \"circle\"},\n {\"subject\": \"curve\", \"predicate\": \"includes\", \"object\": \"arc\"},\n {\"subject\": \"curve\", \"predicate\": \"includes\", \"object\": \"polygon\"},\n {\"subject\": \"curve\", \"predicate\": \"includes\", \"object\": \"ellipse\"},\n {\"subject\": \"curve\", \"predicate\": \"includes\", \"object\": \"helix\"},\n {\"subject\": \"curve\", \"predicate\": \"includes\", \"object\": \"spiral\"},\n {\"subject\": \"curve\", \"predicate\": \"can be\", \"object\": \"open curve\"},\n {\"subject\": \"curve\", \"predicate\": \"can be\", \"object\": ",
"error": null
},
"condition_b": {
"local_latency_s": 14.08,
"local_candidates": [
"Rhino",
"Non-Uniform Rational B-Splines (NURBS)",
"X",
"Y",
"Z",
"Cartesian space",
"Point",
"Line",
"Curve",
"Arc",
"Circle",
"Polygon",
"Ellipse",
"Helix",
"Spiral"
],
"local_raw": "{\n \"candidates\": [\"Rhino\", \"Non-Uniform Rational B-Splines (NURBS)\", \"X\", \"Y\", \"Z\", \"Cartesian space\", \"Point\", \"Line\", \"Curve\", \"Arc\", \"Circle\", \"Polygon\", \"Ellipse\", \"Helix\", \"Spiral\"]\n}",
"api_input_tokens": 616,
"api_output_tokens": 991,
"api_latency_s": 5.02,
"entity_count": 18,
"edge_count": 23,
"stop_reason": "end_turn",
"response_text": "```json\n{\n \"entities\": [\n {\"name\": \"Rhino\", \"type\": \"Software\"},\n {\"name\": \"Non-Uniform Rational B-Splines (NURBS)\", \"type\": \"Mathematical Representation\"},\n {\"name\": \"X\", \"type\": \"Axis\"},\n {\"name\": \"Y\", \"type\": \"Axis\"},\n {\"name\": \"Z\", \"type\": \"Axis\"},\n {\"name\": \"Cartesian space\", \"type\": \"Coordinate System\"},\n {\"name\": \"Point\", \"type\": \"Geometric Element\"},\n {\"name\": \"Line\", \"type\": \"Curve Type\"},\n {\"name\": \"Curve\", \"type\": \"Geometric Element\"},\n {\"name\": \"Arc\", \"type\": \"Curve Type\"},\n {\"name\": \"Circle\", \"type\": \"Curve Type\"},\n {\"name\": \"Polygon\", \"type\": \"Curve Type\"},\n {\"name\": \"Ellipse\", \"type\": \"Curve Type\"},\n {\"name\": \"Helix\", \"type\": \"Curve Type\"},\n {\"name\": \"Spiral\", \"type\": \"Curve Type\"},\n {\"name\": \"Surface\", \"type\": \"Geometric Element\"},\n {\"name\": \"Solid\", \"type\": \"Geometric Element\"},\n {\"name\": \"Polycurve\", \"type\": \"Geometric Element\"}\n ],\n \"edges\": [\n {\"subject\": \"Rhino\", \"predicate\": \"represents all geometry using\", \"object\": \"Non-Uniform Rational B-Splines (NURBS)\"},\n {\"subject\": \"Non-Uniform Rational B-Splines (NURBS)\", \"predicate\": \"can describe\", \"object\": \"Line\"},\n {\"subject\": \"Non-Uniform Rational B-Splines (NURBS)\", \"predicate\": \"can describe\", \"object\": \"Circle\"},\n {\"subject\": \"Non-Uniform Rational B-Splines (NURBS)\", \"predicate\": \"can describe\", \"object\": \"Arc\"},\n {\"subject\": \"Non-Uniform Rational B-Splines (NURBS)\", \"predicate\": \"can describe\", \"object\": \"Curve\"},\n {\"subject\": \"Non-Uniform Rational B-Splines (NURBS)\", \"predicate\": \"can describe\", \"object\": \"Surface\"},\n {\"subject\": \"Non-Uniform Rational B-Splines (NURBS)\", \"predicate\": \"can describe\", \"object\": \"Solid\"},\n {\"subject\": \"Point\", \"predicate\": \"represents position in\", \"object\": \"Cartesian space\"},\n {\"subject\": \"Point\", \"predicate\": \"is expressed along\", \"object\": \"X\"},\n {\"subject\": \"Point\", \"predicate\": \"is expressed along\", \"object\": \"Y\"},\n {\"subject\": \"Point\", \"predicate\": \"is expressed along\", \"object\": \"Z\"},\n {\"subject\": \"Point\", \"predicate\": \"is the most basic element in\", \"object\": \"Rhino\"},\n {\"subject\": \"Curve\", \"predicate\": \"includes\", \"object\": \"Line\"},\n {\"subject\": \"Curve\", \"predicate\": \"includes\", \"object\": \"Arc\"},\n {\"subject\": \"Curve\", \"predicate\": \"includes\", \"object\": \"Circle\"},\n {\"subject\": \"Curve\", \"predicate\": \"includes\", \"object\": \"Polygon\"},\n {\"subject\": \"Curve\", \"predicate\": \"includes\", \"object\": \"Ellipse\"},\n {\"subject\": \"Curve\", \"predicate\": \"includes\", \"object\": \"Helix\"},\n {\"subject\": \"Curve\", \"predicate\": \"includes\", \"object\": \"Spiral\"},\n {\"subject\": \"Curve\", \"predicate\": \"are building blocks for\", \"object\": \"Surface\"},\n {\"subject\": \"Curve\", \"predicate\": \"are building blocks for\", \"object\": \"Solid\"},\n {\"subject\": \"Polycurve\", \"predicate\": \"is composed of multiple\", \"object\": \"Curve\"},\n {\"subject\": \"Curve\", \"predicate\": \"can be joined to\", \"object\": \"Curve\"}\n ]\n}\n```",
"error": null
}
},
{
"source": "02_Point of Curves - AARON.docx",
"size_bucket": "medium",
"doc_chars_original": 2116,
"doc_chars_sent": 2116,
"truncated": false,
"condition_a": {
"input_tokens": 667,
"output_tokens": 2012,
"latency_s": 9.5,
"entity_count": 44,
"edge_count": 48,
"stop_reason": "end_turn",
"response_text": "```json\n{\n \"entities\": [\n {\"name\": \"Rhino\", \"type\": \"Software\"},\n {\"name\": \"NURBS\", \"type\": \"Mathematical Representation\"},\n {\"name\": \"Geometry\", \"type\": \"Concept\"},\n {\"name\": \"3-D geometry\", \"type\": \"Geometric Type\"},\n {\"name\": \"2-D line\", \"type\": \"Geometric Shape\"},\n {\"name\": \"circle\", \"type\": \"Geometric Shape\"},\n {\"name\": \"arc\", \"type\": \"Geometric Shape\"},\n {\"name\": \"curve\", \"type\": \"Geometric Shape\"},\n {\"name\": \"3-D organic free-form surface\", \"type\": \"Geometric Shape\"},\n {\"name\": \"solid\", \"type\": \"Geometric Shape\"},\n {\"name\": \"point\", \"type\": \"Geometric Element\"},\n {\"name\": \"3D Cartesian space\", \"type\": \"Coordinate System\"},\n {\"name\": \"X-axis\", \"type\": \"Coordinate Axis\"},\n {\"name\": \"Y-axis\", \"type\": \"Coordinate Axis\"},\n {\"name\": \"Z-axis\", \"type\": \"Coordinate Axis\"},\n {\"name\": \"2D geometry\", \"type\": \"Geometric Type\"},\n {\"name\": \"3D geometry\", \"type\": \"Geometric Type\"},\n {\"name\": \"line\", \"type\": \"Geometric Shape\"},\n {\"name\": \"polygon\", \"type\": \"Geometric Shape\"},\n {\"name\": \"ellipse\", \"type\": \"Geometric Shape\"},\n {\"name\": \"helix\", \"type\": \"Geometric Shape\"},\n {\"name\": \"spiral\", \"type\": \"Geometric Shape\"},\n {\"name\": \"open curve\", \"type\": \"Curve Type\"},\n {\"name\": \"closed curve\", \"type\": \"Curve Type\"},\n {\"name\": \"planar curve\", \"type\": \"Curve Type\"},\n {\"name\": \"non-planar curve\", \"type\": \"Curve Type\"},\n {\"name\": \"single curve\", \"type\": \"Curve Type\"},\n {\"name\": \"polycurve\", \"type\": \"Curve Type\"},\n {\"name\": \"polyline\", \"type\": \"Geometric Shape\"},\n {\"name\": \"illustration\", \"type\": \"Application\"},\n {\"name\": \"animation\", \"type\": \"Application\"},\n {\"name\": \"manufacturing\", \"type\": \"Application\"},\n {\"name\": \"small object template\", \"type\": \"Template\"},\n {\"name\": \"Top viewport\", \"type\": \"Viewport\"},\n {\"name\": \"Circle layer\", \"type\": \"Layer\"},\n {\"name\": \"Star layer\", \"type\": \"Layer\"},\n {\"name\": \"Rectangle layer\", \"type\": \"Layer\"},\n {\"name\": \"Polyline layer\", \"type\": \"Layer\"},\n {\"name\": \"Curve layer\", \"type\": \"Layer\"},\n {\"name\": \"Shape layer\", \"type\": \"Layer\"},\n {\"name\": \"Star\", \"type\": \"Geometric Shape\"},\n {\"name\": \"Rectangle\", \"type\": \"Geometric Shape\"},\n {\"name\": \"Rectangle with rounded corners\", \"type\": \"Geometric Shape\"},\n {\"name\": \"Open Poly-line\", \"type\": \"Geometric Shape\"}\n ],\n \"edges\": [\n {\"subject\": \"Rhino\", \"predicate\": \"represents all geometry with\", \"object\": \"NURBS\"},\n {\"subject\": \"NURBS\", \"predicate\": \"is a\", \"object\": \"Mathematical Representation\"},\n {\"subject\": \"NURBS\", \"predicate\": \"can describe\", \"object\": \"2-D line\"},\n {\"subject\": \"NURBS\", \"predicate\": \"can describe\", \"object\": \"circle\"},\n {\"subject\": \"NURBS\", \"predicate\": \"can describe\", \"object\": \"arc\"},\n {\"subject\": \"NURBS\", \"predicate\": \"can describe\", \"object\": \"curve\"},\n {\"subject\": \"NURBS\", \"predicate\": \"can describe\", \"object\": \"3-D organic free-form surface\"},\n {\"subject\": \"NURBS\", \"predicate\": \"can describe\", \"object\": \"solid\"},\n {\"subject\": \"NURBS\", \"predicate\": \"have\", \"object\": \"flexibility\"},\n {\"subject\": \"NURBS\", \"predicate\": \"have\", \"object\": \"accuracy\"},\n {\"subject\": \"NURBS models\", \"predicate\": \"can be used in\", \"object\": \"illustration\"},\n {\"subject\": \"NURBS models\", \"predicate\": \"can be used in\", \"object\": \"animation\"},\n {\"subject\": \"NURBS models\", \"predicate\": \"can be used in\", \"object\": \"manufacturing\"},\n {\"subject\": \"point\", \"predicate\": \"is the most basic element in\", \"object\": \"Rhino\"},\n {\"subject\": \"point\", \"predicate\": \"represents a position in\", \"object\": \"3D Cartesian space\"},\n {\"subject\": \"point\", \"predicate\": \"is expressed as coordinates along\", \"object\": \"X-axis\"},\n {\"subject\": \"point\", \"predicate\": \"is expressed as coordinates along\", \"object\": \"Y-axis\"},\n {\"subject\": \"point\", \"predicate\": \"is expressed as coordinates along\", \"object\": \"Z-axis\"},\n {\"subject\": \"points\", \"predicate\": \"are building blocks that define\", \"object\": \"2D geometry\"},\n {\"s",
"error": null
},
"condition_b": {
"local_latency_s": 18.77,
"local_candidates": [
"Rhino",
"NURBS",
"Point",
"X-axis",
"Y-axis",
"Z-axis",
"Curve",
"Line",
"Arc",
"Circle",
"Polygon",
"Ellipse",
"Helix",
"Spiral",
"Polycurve",
"Homework",
"small object template",
"Top viewport",
"Layer name",
"Circle",
"Star",
"Rectangle",
"Rectangle with rounded corners",
"Polyline",
"Open Poly-line",
"Closed Curve",
"Shape"
],
"local_raw": "{\n \"candidates\": [\"Rhino\", \"NURBS\", \"Point\", \"X-axis\", \"Y-axis\", \"Z-axis\", \"Curve\", \"Line\", \"Arc\", \"Circle\", \"Polygon\", \"Ellipse\", \"Helix\", \"Spiral\", \"Polycurve\", \"Homework\", \"small object template\", \"Top viewport\", \"Layer name\", \"Circle\", \"Star\", \"Rectangle\", \"Rectangle with rounded corners\", \"Polyline\", \"Open Poly-line\", \"Closed Curve\", \"Shape\"]\n}",
"api_input_tokens": 824,
"api_output_tokens": 1447,
"api_latency_s": 6.54,
"entity_count": 29,
"edge_count": 35,
"stop_reason": "end_turn",
"response_text": "```json\n{\n \"entities\": [\n {\"name\": \"Rhino\", \"type\": \"Software\"},\n {\"name\": \"NURBS\", \"type\": \"Mathematical Representation\"},\n {\"name\": \"Point\", \"type\": \"Geometric Element\"},\n {\"name\": \"X-axis\", \"type\": \"Coordinate Axis\"},\n {\"name\": \"Y-axis\", \"type\": \"Coordinate Axis\"},\n {\"name\": \"Z-axis\", \"type\": \"Coordinate Axis\"},\n {\"name\": \"3D Cartesian space\", \"type\": \"Coordinate System\"},\n {\"name\": \"Curve\", \"type\": \"Geometric Entity\"},\n {\"name\": \"Line\", \"type\": \"Curve Type\"},\n {\"name\": \"Arc\", \"type\": \"Curve Type\"},\n {\"name\": \"Circle\", \"type\": \"Curve Type\"},\n {\"name\": \"Polygon\", \"type\": \"Curve Type\"},\n {\"name\": \"Ellipse\", \"type\": \"Curve Type\"},\n {\"name\": \"Helix\", \"type\": \"Curve Type\"},\n {\"name\": \"Spiral\", \"type\": \"Curve Type\"},\n {\"name\": \"Polycurve\", \"type\": \"Geometric Entity\"},\n {\"name\": \"2D geometry\", \"type\": \"Geometry Type\"},\n {\"name\": \"3D geometry\", \"type\": \"Geometry Type\"},\n {\"name\": \"Polyline\", \"type\": \"Geometric Entity\"},\n {\"name\": \"Open Poly-line\", \"type\": \"Polyline Type\"},\n {\"name\": \"Closed Curve\", \"type\": \"Curve Type\"},\n {\"name\": \"Homework 1\", \"type\": \"Assignment\"},\n {\"name\": \"small object template\", \"type\": \"Document Template\"},\n {\"name\": \"Top viewport\", \"type\": \"Viewport\"},\n {\"name\": \"Layer\", \"type\": \"Document Organization\"},\n {\"name\": \"Rectangle\", \"type\": \"Curve Type\"},\n {\"name\": \"Rectangle with rounded corners\", \"type\": \"Curve Type\"},\n {\"name\": \"Star\", \"type\": \"Shape\"},\n {\"name\": \"Shape\", \"type\": \"Geometric Entity\"}\n ],\n \"edges\": [\n {\"subject\": \"Rhino\", \"predicate\": \"represents all geometry with\", \"object\": \"NURBS\"},\n {\"subject\": \"NURBS\", \"predicate\": \"can describe\", \"object\": \"2D geometry\"},\n {\"subject\": \"NURBS\", \"predicate\": \"can describe\", \"object\": \"3D geometry\"},\n {\"subject\": \"Point\", \"predicate\": \"represents position in\", \"object\": \"3D Cartesian space\"},\n {\"subject\": \"Point\", \"predicate\": \"is expressed as coordinates along\", \"object\": \"X-axis\"},\n {\"subject\": \"Point\", \"predicate\": \"is expressed as coordinates along\", \"object\": \"Y-axis\"},\n {\"subject\": \"Point\", \"predicate\": \"is expressed as coordinates along\", \"object\": \"Z-axis\"},\n {\"subject\": \"Point\", \"predicate\": \"is basic building block for\", \"object\": \"2D geometry\"},\n {\"subject\": \"Point\", \"predicate\": \"is basic building block for\", \"object\": \"3D geometry\"},\n {\"subject\": \"Curve\", \"predicate\": \"is basic geometric entity in\", \"object\": \"Rhino\"},\n {\"subject\": \"Curve\", \"predicate\": \"is generalization of\", \"object\": \"Line\"},\n {\"subject\": \"Curve\", \"predicate\": \"includes\", \"object\": \"Line\"},\n {\"subject\": \"Curve\", \"predicate\": \"includes\", \"object\": \"Arc\"},\n {\"subject\": \"Curve\", \"predicate\": \"includes\", \"object\": \"Circle\"},\n {\"subject\": \"Curve\", \"predicate\": \"includes\", \"object\": \"Polygon\"},\n {\"subject\": \"Curve\", \"predicate\": \"includes\", \"object\": \"Ellipse\"},\n {\"subject\": \"Curve\", \"predicate\": \"includes\", \"object\": \"Helix\"},\n {\"subject\": \"Curve\", \"predicate\": \"includes\", \"object\": \"Spiral\"},\n {\"subject\": \"Curve\", \"predicate\": \"is building block for\", \"object\": \"complex geometries\"},\n {\"subject\": \"Polycurve\", \"predicate\": \"is composed of\", \"object\": \"Curve\"},\n {\"subject\": \"Polycurve\", \"predicate\": \"joins curves at\", \"object\": \"end points\"},\n {\"subject\": \"2D geometry\", \"predicate\": \"is comprised of\", \"object\": \"Curve\"},\n {\"subject\": \"2D geometry\", \"predicate\": \"is comprised of\", \"object\": \"Line\"},\n {\"subject\": \"2D geometry\", \"predicate\": \"is comprised of\", \"object\": \"Polyline\"},\n {\"subject\": \"Homework 1\", \"predicate\": \"requires creating document with\", \"object\": \"small object template\"},\n {\"subject\": \"Homework 1\", \"predicate\": \"requires working in\", \"object\": \"Top viewport\"},\n {\"subject\": \"Homework 1\", \"predicate\": \"requires creating\", \"object\": \"Circle\"},\n {\"subject\": \"Homework 1\", \"predicate\": \"requires creating\", \"object\": \"Star\"},\n {\"subject\": \"Homework 1\", \"predicate\": \"requires creating\", \"object\": \"Re",
"error": null
}
},
{
"source": "02_Point of Curves.docx",
"size_bucket": "medium",
"doc_chars_original": 1338,
"doc_chars_sent": 1338,
"truncated": false,
"condition_a": {
"input_tokens": 491,
"output_tokens": 1392,
"latency_s": 6.59,
"entity_count": 30,
"edge_count": 34,
"stop_reason": "end_turn",
"response_text": "```json\n{\n \"entities\": [\n {\"name\": \"Rhino\", \"type\": \"Software\"},\n {\"name\": \"NURBS\", \"type\": \"Mathematical Representation\"},\n {\"name\": \"Geometry\", \"type\": \"Concept\"},\n {\"name\": \"3-D geometry\", \"type\": \"Geometric Form\"},\n {\"name\": \"2-D line\", \"type\": \"Geometric Shape\"},\n {\"name\": \"circle\", \"type\": \"Geometric Shape\"},\n {\"name\": \"arc\", \"type\": \"Geometric Shape\"},\n {\"name\": \"curve\", \"type\": \"Geometric Shape\"},\n {\"name\": \"3-D organic free-form surface\", \"type\": \"Geometric Shape\"},\n {\"name\": \"solid\", \"type\": \"Geometric Shape\"},\n {\"name\": \"illustration\", \"type\": \"Application\"},\n {\"name\": \"animation\", \"type\": \"Application\"},\n {\"name\": \"manufacturing\", \"type\": \"Application\"},\n {\"name\": \"point\", \"type\": \"Geometric Element\"},\n {\"name\": \"area\", \"type\": \"Property\"},\n {\"name\": \"volume\", \"type\": \"Property\"},\n {\"name\": \"XYZ intersection\", \"type\": \"Coordinate System\"},\n {\"name\": \"2D geometry\", \"type\": \"Geometric Form\"},\n {\"name\": \"3D geometry\", \"type\": \"Geometric Form\"},\n {\"name\": \"line\", \"type\": \"Geometric Shape\"},\n {\"name\": \"polygon\", \"type\": \"Geometric Shape\"},\n {\"name\": \"ellipse\", \"type\": \"Geometric Shape\"},\n {\"name\": \"helix\", \"type\": \"Geometric Shape\"},\n {\"name\": \"spiral\", \"type\": \"Geometric Shape\"},\n {\"name\": \"open curve\", \"type\": \"Curve Type\"},\n {\"name\": \"closed curve\", \"type\": \"Curve Type\"},\n {\"name\": \"planar curve\", \"type\": \"Curve Type\"},\n {\"name\": \"non-planar curve\", \"type\": \"Curve Type\"},\n {\"name\": \"single curve\", \"type\": \"Curve Category\"},\n {\"name\": \"polycurve\", \"type\": \"Curve Category\"}\n ],\n \"edges\": [\n {\"subject\": \"Rhino\", \"predicate\": \"represents all geometry with\", \"object\": \"NURBS\"},\n {\"subject\": \"NURBS\", \"predicate\": \"are mathematical representations of\", \"object\": \"3-D geometry\"},\n {\"subject\": \"NURBS\", \"predicate\": \"can accurately describe\", \"object\": \"2-D line\"},\n {\"subject\": \"NURBS\", \"predicate\": \"can accurately describe\", \"object\": \"circle\"},\n {\"subject\": \"NURBS\", \"predicate\": \"can accurately describe\", \"object\": \"arc\"},\n {\"subject\": \"NURBS\", \"predicate\": \"can accurately describe\", \"object\": \"curve\"},\n {\"subject\": \"NURBS\", \"predicate\": \"can accurately describe\", \"object\": \"3-D organic free-form surface\"},\n {\"subject\": \"NURBS\", \"predicate\": \"can accurately describe\", \"object\": \"solid\"},\n {\"subject\": \"NURBS models\", \"predicate\": \"can be used in\", \"object\": \"illustration\"},\n {\"subject\": \"NURBS models\", \"predicate\": \"can be used in\", \"object\": \"animation\"},\n {\"subject\": \"NURBS models\", \"predicate\": \"can be used in\", \"object\": \"manufacturing\"},\n {\"subject\": \"point\", \"predicate\": \"is the most basic element in\", \"object\": \"Rhino\"},\n {\"subject\": \"point\", \"predicate\": \"represents\", \"object\": \"XYZ intersection\"},\n {\"subject\": \"point\", \"predicate\": \"has no\", \"object\": \"area\"},\n {\"subject\": \"point\", \"predicate\": \"has no\", \"object\": \"volume\"},\n {\"subject\": \"point\", \"predicate\": \"is used to define\", \"object\": \"2D geometry\"},\n {\"subject\": \"point\", \"predicate\": \"is used to define\", \"object\": \"3D geometry\"},\n {\"subject\": \"curve\", \"predicate\": \"is a 2D geometric form in Rhino\", \"object\": \"Geometry\"},\n {\"subject\": \"curve\", \"predicate\": \"includes\", \"object\": \"line\"},\n {\"subject\": \"curve\", \"predicate\": \"includes\", \"object\": \"circle\"},\n {\"subject\": \"curve\", \"predicate\": \"includes\", \"object\": \"arc\"},\n {\"subject\": \"curve\", \"predicate\": \"includes\", \"object\": \"polygon\"},\n {\"subject\": \"curve\", \"predicate\": \"includes\", \"object\": \"ellipse\"},\n {\"subject\": \"curve\", \"predicate\": \"includes\", \"object\": \"helix\"},\n {\"subject\": \"curve\", \"predicate\": \"includes\", \"object\": \"spiral\"},\n {\"subject\": \"curve\", \"predicate\": \"can be\", \"object\": \"open curve\"},\n {\"subject\": \"curve\", \"predicate\": \"can be\", \"object\": \"closed curve\"},\n {\"subject\": \"curve\", \"predicate\": \"can be\", \"object\": \"planar curve\"},\n {\"subject\": \"curve\", \"predicate\": \"can be\", \"object\": \"non-planar curve\"},\n {\"subject\": ",
"error": null
},
"condition_b": {
"local_latency_s": 10.39,
"local_candidates": [
"Rhino",
"Non-Uniform Rational B-Splines (NURBS)",
"point",
"X",
"Y",
"Z",
"line",
"curve",
"arc",
"circle",
"polygon",
"ellipse",
"helices",
"spirals",
"polycurve"
],
"local_raw": "{\n \"candidates\": [\"Rhino\", \"Non-Uniform Rational B-Splines (NURBS)\", \"point\", \"X\", \"Y\", \"Z\", \"line\", \"curve\", \"arc\", \"circle\", \"polygon\", \"ellipse\", \"helices\", \"spirals\", \"polycurve\"]\n}",
"api_input_tokens": 597,
"api_output_tokens": 1015,
"api_latency_s": 5.03,
"entity_count": 19,
"edge_count": 24,
"stop_reason": "end_turn",
"response_text": "```json\n{\n \"entities\": [\n {\"name\": \"Rhino\", \"type\": \"Software\"},\n {\"name\": \"Non-Uniform Rational B-Splines (NURBS)\", \"type\": \"Mathematical Representation\"},\n {\"name\": \"point\", \"type\": \"Geometric Element\"},\n {\"name\": \"X\", \"type\": \"Coordinate Axis\"},\n {\"name\": \"Y\", \"type\": \"Coordinate Axis\"},\n {\"name\": \"Z\", \"type\": \"Coordinate Axis\"},\n {\"name\": \"line\", \"type\": \"Curve Type\"},\n {\"name\": \"curve\", \"type\": \"Geometric Element\"},\n {\"name\": \"arc\", \"type\": \"Curve Type\"},\n {\"name\": \"circle\", \"type\": \"Curve Type\"},\n {\"name\": \"polygon\", \"type\": \"Curve Type\"},\n {\"name\": \"ellipse\", \"type\": \"Curve Type\"},\n {\"name\": \"helix\", \"type\": \"Curve Type\"},\n {\"name\": \"spiral\", \"type\": \"Curve Type\"},\n {\"name\": \"polycurve\", \"type\": \"Geometric Element\"},\n {\"name\": \"2D geometry\", \"type\": \"Geometry Type\"},\n {\"name\": \"3D geometry\", \"type\": \"Geometry Type\"},\n {\"name\": \"surface\", \"type\": \"Geometric Element\"},\n {\"name\": \"solid\", \"type\": \"Geometric Element\"}\n ],\n \"edges\": [\n {\"subject\": \"Rhino\", \"predicate\": \"represents all geometry using\", \"object\": \"Non-Uniform Rational B-Splines (NURBS)\"},\n {\"subject\": \"Non-Uniform Rational B-Splines (NURBS)\", \"predicate\": \"can describe\", \"object\": \"line\"},\n {\"subject\": \"Non-Uniform Rational B-Splines (NURBS)\", \"predicate\": \"can describe\", \"object\": \"circle\"},\n {\"subject\": \"Non-Uniform Rational B-Splines (NURBS)\", \"predicate\": \"can describe\", \"object\": \"arc\"},\n {\"subject\": \"Non-Uniform Rational B-Splines (NURBS)\", \"predicate\": \"can describe\", \"object\": \"curve\"},\n {\"subject\": \"Non-Uniform Rational B-Splines (NURBS)\", \"predicate\": \"can describe\", \"object\": \"surface\"},\n {\"subject\": \"Non-Uniform Rational B-Splines (NURBS)\", \"predicate\": \"can describe\", \"object\": \"solid\"},\n {\"subject\": \"point\", \"predicate\": \"represents\", \"object\": \"X\"},\n {\"subject\": \"point\", \"predicate\": \"represents\", \"object\": \"Y\"},\n {\"subject\": \"point\", \"predicate\": \"represents\", \"object\": \"Z\"},\n {\"subject\": \"point\", \"predicate\": \"is the most basic element in\", \"object\": \"Rhino\"},\n {\"subject\": \"point\", \"predicate\": \"used to define\", \"object\": \"2D geometry\"},\n {\"subject\": \"point\", \"predicate\": \"used to define\", \"object\": \"3D geometry\"},\n {\"subject\": \"curve\", \"predicate\": \"includes\", \"object\": \"line\"},\n {\"subject\": \"curve\", \"predicate\": \"includes\", \"object\": \"arc\"},\n {\"subject\": \"curve\", \"predicate\": \"includes\", \"object\": \"circle\"},\n {\"subject\": \"curve\", \"predicate\": \"includes\", \"object\": \"polygon\"},\n {\"subject\": \"curve\", \"predicate\": \"includes\", \"object\": \"ellipse\"},\n {\"subject\": \"curve\", \"predicate\": \"includes\", \"object\": \"helix\"},\n {\"subject\": \"curve\", \"predicate\": \"includes\", \"object\": \"spiral\"},\n {\"subject\": \"curve\", \"predicate\": \"is a building block for\", \"object\": \"3D geometry\"},\n {\"subject\": \"polycurve\", \"predicate\": \"is composed of multiple\", \"object\": \"curve\"},\n {\"subject\": \"polycurve\", \"predicate\": \"is a type of\", \"object\": \"2D geometry\"},\n {\"subject\": \"curve\", \"predicate\": \"is a type of\", \"object\": \"2D geometry\"}\n ]\n}\n```",
"error": null
}
},
{
"source": "02_PPT_ALL_AM_Technologies_for_3DP_v3.pptx",
"size_bucket": "large",
"doc_chars_original": 9360,
"doc_chars_sent": 8000,
"truncated": true,
"condition_a": {
"input_tokens": 2398,
"output_tokens": 4096,
"latency_s": 18.21,
"entity_count": null,
"edge_count": null,
"stop_reason": "max_tokens",
"response_text": "```json\n{\n \"entities\": [\n {\"name\": \"Additive Manufacturing\", \"type\": \"Technology Domain\"},\n {\"name\": \"ASTM\", \"type\": \"Organization\"},\n {\"name\": \"Material Extrusion\", \"type\": \"AM Process\"},\n {\"name\": \"FDM\", \"type\": \"AM Technology\"},\n {\"name\": \"Fused Deposition Modeling\", \"type\": \"AM Technology\"},\n {\"name\": \"FFF\", \"type\": \"AM Technology\"},\n {\"name\": \"Fused Filament Fabrication\", \"type\": \"AM Technology\"},\n {\"name\": \"Vat Photopolymerization\", \"type\": \"AM Process\"},\n {\"name\": \"SL\", \"type\": \"AM Technology\"},\n {\"name\": \"SLA\", \"type\": \"AM Technology\"},\n {\"name\": \"Stereolithography\", \"type\": \"AM Technology\"},\n {\"name\": \"DLP\", \"type\": \"AM Technology\"},\n {\"name\": \"Digital Light Processing\", \"type\": \"AM Technology\"},\n {\"name\": \"3SP\", \"type\": \"AM Technology\"},\n {\"name\": \"Powder Bed Fusion\", \"type\": \"AM Process\"},\n {\"name\": \"SLS\", \"type\": \"AM Technology\"},\n {\"name\": \"Selective Laser Sintering\", \"type\": \"AM Technology\"},\n {\"name\": \"DMLS\", \"type\": \"AM Technology\"},\n {\"name\": \"Direct Metal Laser Sintering\", \"type\": \"AM Technology\"},\n {\"name\": \"EBM\", \"type\": \"AM Technology\"},\n {\"name\": \"Electron Beam Melting\", \"type\": \"AM Technology\"},\n {\"name\": \"SHS\", \"type\": \"AM Technology\"},\n {\"name\": \"Selective Heat Sintering\", \"type\": \"AM Technology\"},\n {\"name\": \"Binder Jetting\", \"type\": \"AM Process\"},\n {\"name\": \"CJP\", \"type\": \"AM Technology\"},\n {\"name\": \"ColorJet Printing\", \"type\": \"AM Technology\"},\n {\"name\": \"PP\", \"type\": \"AM Technology\"},\n {\"name\": \"Plaster-based 3D Printing\", \"type\": \"AM Technology\"},\n {\"name\": \"Sheet Lamination\", \"type\": \"AM Process\"},\n {\"name\": \"UC\", \"type\": \"AM Technology\"},\n {\"name\": \"Ultrasonic Consolidation\", \"type\": \"AM Technology\"},\n {\"name\": \"LOM\", \"type\": \"AM Technology\"},\n {\"name\": \"Laminated Object Manufacturing\", \"type\": \"AM Technology\"},\n {\"name\": \"Directed Energy Deposition\", \"type\": \"AM Process\"},\n {\"name\": \"LMD\", \"type\": \"AM Technology\"},\n {\"name\": \"Laser Metal Deposition\", \"type\": \"AM Technology\"},\n {\"name\": \"Material Jetting\", \"type\": \"AM Process\"},\n {\"name\": \"MJP\", \"type\": \"AM Technology\"},\n {\"name\": \"MultiJet Printing\", \"type\": \"AM Technology\"},\n {\"name\": \"PJ\", \"type\": \"AM Technology\"},\n {\"name\": \"PolyJet\", \"type\": \"AM Technology\"},\n {\"name\": \"Photopolymer Jetting\", \"type\": \"AM Technology\"},\n {\"name\": \"LM\", \"type\": \"AM Technology\"},\n {\"name\": \"Laser Melting\", \"type\": \"AM Technology\"},\n {\"name\": \"SLM\", \"type\": \"AM Technology\"},\n {\"name\": \"Selective Laser Melting\", \"type\": \"AM Technology\"},\n {\"name\": \"BJ\", \"type\": \"AM Process\"},\n {\"name\": \"MJ\", \"type\": \"AM Process\"},\n {\"name\": \"3D Printing\", \"type\": \"Manufacturing Process\"},\n {\"name\": \"Prototypes\", \"type\": \"Application\"},\n {\"name\": \"Manufacturing Aides\", \"type\": \"Application\"},\n {\"name\": \"Small series parts\", \"type\": \"Application\"},\n {\"name\": \"Casting patterns\", \"type\": \"Application\"},\n {\"name\": \"Support parts\", \"type\": \"Application\"},\n {\"name\": \"Tools for injection molds\", \"type\": \"Application\"},\n {\"name\": \"Green parts\", \"type\": \"Application\"},\n {\"name\": \"Molds and cores\", \"type\": \"Application\"},\n {\"name\": \"Lost wax casting\", \"type\": \"Application\"},\n {\"name\": \"Jewelry\", \"type\": \"Application\"},\n {\"name\": \"Dental\", \"type\": \"Application\"},\n {\"name\": \"Photopolymers\", \"type\": \"Material\"},\n {\"name\": \"Plastics\", \"type\": \"Material\"},\n {\"name\": \"Metals\", \"type\": \"Material\"},\n {\"name\": \"Thermoplastics\", \"type\": \"Material\"},\n {\"name\": \"Powder\", \"type\": \"Material Form\"},\n {\"name\": \"UV-active photopolymers\", \"type\": \"Material\"},\n {\"name\": \"Nylon\", \"type\": \"Material\"},\n {\"name\": \"Wax-like materials\", \"type\": \"Material\"},\n {\"name\": \"Anisotropy\", \"type\": \"Property\"},\n {\"name\": \"Mechanical properties\", \"type\": \"Property\"},\n {\"name\": \"Surface finishes\", \"type\": \"Property\"},\n {\"name\": \"Accuracy\", \"type\": \"Property\"},\n {\"name\": \"Density\", \"type\": \"Property\"},\n {\"name",
"error": null
},
"condition_b": {
"local_latency_s": 100.44,
"local_candidates": [
"Additive Manufacturing",
"AM",
"ASTM",
"American Society for Testing Materials",
"Material Extrusion",
"FDM",
"Fused Deposition Modeling",
"FFF",
"Fused Filament Fabrication",
"Vat Photopolymerization",
"SL",
"Stereolithography",
"DLP",
"Digital Light Processing",
"Powder Bed Fusion",
"SLS",
"Direct Metal Laser Sintering",
"EBM",
"Electron Beam Melting",
"SHS",
"Selective Heat Sintering",
"Binder Jetting",
"CJP",
"ColorJet Printing",
"PP",
"Plaster-based 3D Printing",
"Sheet Lamination",
"UC",
"Ultrasonic Consolidation",
"LOM",
"Laminated Object Manufacturing"
],
"local_raw": "{\n \"candidates\": [\n \"Additive Manufacturing\",\n \"AM\",\n \"ASTM\",\n \"American Society for Testing Materials\",\n \"Material Extrusion\",\n \"FDM\",\n \"Fused Deposition Modeling\",\n \"FFF\",\n \"Fused Filament Fabrication\",\n \"Vat Photopolymerization\",\n \"SL\",\n \"Stereolithography\",\n \"DLP\",\n \"Digital Light Processing\",\n \"Powder Bed Fusion\",\n \"SLS\",\n \"Direct Metal Laser Sintering\",\n \"EBM\",\n \"Electron Beam Melting\",\n \"SHS\",\n \"Selective Heat Sintering\",\n \"Binder Jetting\",\n \"CJP\",\n \"ColorJet Printing\",\n \"PP\",\n \"Plaster-based 3D Printing\",\n \"Sheet Lamination\",\n \"UC\",\n \"Ultrasonic Consolidation\",\n \"LOM\",\n \"Laminated Object Manufacturing\"\n ]\n}",
"api_input_tokens": 2635,
"api_output_tokens": 4096,
"api_latency_s": 18.61,
"entity_count": null,
"edge_count": null,
"stop_reason": "max_tokens",
"response_text": "```json\n{\n \"entities\": [\n {\"name\": \"Additive Manufacturing\", \"type\": \"Technology Category\"},\n {\"name\": \"AM\", \"type\": \"Abbreviation\"},\n {\"name\": \"ASTM\", \"type\": \"Organization\"},\n {\"name\": \"American Society for Testing Materials\", \"type\": \"Organization\"},\n {\"name\": \"Material Extrusion\", \"type\": \"AM Process\"},\n {\"name\": \"FDM\", \"type\": \"Technology\"},\n {\"name\": \"Fused Deposition Modeling\", \"type\": \"Technology\"},\n {\"name\": \"FFF\", \"type\": \"Technology\"},\n {\"name\": \"Fused Filament Fabrication\", \"type\": \"Technology\"},\n {\"name\": \"Vat Photopolymerization\", \"type\": \"AM Process\"},\n {\"name\": \"SL\", \"type\": \"Technology\"},\n {\"name\": \"SLA\", \"type\": \"Technology\"},\n {\"name\": \"Stereolithography\", \"type\": \"Technology\"},\n {\"name\": \"DLP\", \"type\": \"Technology\"},\n {\"name\": \"Digital Light Processing\", \"type\": \"Technology\"},\n {\"name\": \"3SP\", \"type\": \"Technology\"},\n {\"name\": \"Scan, Spin, & Selectively Photocure\", \"type\": \"Technology\"},\n {\"name\": \"Powder Bed Fusion\", \"type\": \"AM Process\"},\n {\"name\": \"SLS\", \"type\": \"Technology\"},\n {\"name\": \"Selective Laser Sintering\", \"type\": \"Technology\"},\n {\"name\": \"DMLS\", \"type\": \"Technology\"},\n {\"name\": \"Direct Metal Laser Sintering\", \"type\": \"Technology\"},\n {\"name\": \"EBM\", \"type\": \"Technology\"},\n {\"name\": \"Electron Beam Melting\", \"type\": \"Technology\"},\n {\"name\": \"SHS\", \"type\": \"Technology\"},\n {\"name\": \"Selective Heat Sintering\", \"type\": \"Technology\"},\n {\"name\": \"LM\", \"type\": \"Technology\"},\n {\"name\": \"Laser Melting\", \"type\": \"Technology\"},\n {\"name\": \"SLM\", \"type\": \"Technology\"},\n {\"name\": \"Binder Jetting\", \"type\": \"AM Process\"},\n {\"name\": \"BJ\", \"type\": \"Abbreviation\"},\n {\"name\": \"CJP\", \"type\": \"Technology\"},\n {\"name\": \"ColorJet Printing\", \"type\": \"Technology\"},\n {\"name\": \"PP\", \"type\": \"Technology\"},\n {\"name\": \"Plaster-based 3D Printing\", \"type\": \"Technology\"},\n {\"name\": \"Sheet Lamination\", \"type\": \"AM Process\"},\n {\"name\": \"UC\", \"type\": \"Technology\"},\n {\"name\": \"Ultrasonic Consolidation\", \"type\": \"Technology\"},\n {\"name\": \"LOM\", \"type\": \"Technology\"},\n {\"name\": \"Laminated Object Manufacturing\", \"type\": \"Technology\"},\n {\"name\": \"Directed Energy Deposition\", \"type\": \"AM Process\"},\n {\"name\": \"LMD\", \"type\": \"Technology\"},\n {\"name\": \"Laser Metal Deposition\", \"type\": \"Technology\"},\n {\"name\": \"Material Jetting\", \"type\": \"AM Process\"},\n {\"name\": \"MJ\", \"type\": \"Abbreviation\"},\n {\"name\": \"MJP\", \"type\": \"Technology\"},\n {\"name\": \"PJ\", \"type\": \"Abbreviation\"},\n {\"name\": \"PolyJet\", \"type\": \"Technology\"},\n {\"name\": \"MultiJet Printing\", \"type\": \"Technology\"},\n {\"name\": \"Photopolymer Jetting\", \"type\": \"Technology\"}\n ],\n \"edges\": [\n {\"subject\": \"ASTM\", \"predicate\": \"defines\", \"object\": \"Additive Manufacturing\"},\n {\"subject\": \"American Society for Testing Materials\", \"predicate\": \"is also known as\", \"object\": \"ASTM\"},\n {\"subject\": \"Material Extrusion\", \"predicate\": \"is an AM process defined by\", \"object\": \"ASTM\"},\n {\"subject\": \"FDM\", \"predicate\": \"is a technology within\", \"object\": \"Material Extrusion\"},\n {\"subject\": \"Fused Deposition Modeling\", \"predicate\": \"is also known as\", \"object\": \"FDM\"},\n {\"subject\": \"FFF\", \"predicate\": \"is also known as\", \"object\": \"FDM\"},\n {\"subject\": \"Fused Filament Fabrication\", \"predicate\": \"is also known as\", \"object\": \"FDM\"},\n {\"subject\": \"Vat Photopolymerization\", \"predicate\": \"is an AM process defined by\", \"object\": \"ASTM\"},\n {\"subject\": \"SL\", \"predicate\": \"is a technology within\", \"object\": \"Vat Photopolymerization\"},\n {\"subject\": \"SLA\", \"predicate\": \"is a technology within\", \"object\": \"Vat Photopolymerization\"},\n {\"subject\": \"Stereolithography\", \"predicate\": \"is also known as\", \"object\": \"SL\"},\n {\"subject\": \"Stereolithography\", \"predicate\": \"is also known as\", \"object\": \"SLA\"},\n {\"subject\": \"DLP\", \"predicate\": \"is a technology within\", \"object\": \"Vat Photopolymerization\"},\n {\"subject\": \"Digital Light Processing\", \"",
"error": null
}
},
{
"source": "04_ALL_Materials and Their Properties_v3.pptx",
"size_bucket": "large",
"doc_chars_original": 25692,
"doc_chars_sent": 8000,
"truncated": true,
"condition_a": {
"input_tokens": 2237,
"output_tokens": 2692,
"latency_s": 13.67,
"entity_count": 57,
"edge_count": 53,
"stop_reason": "end_turn",
"response_text": "```json\n{\n \"entities\": [\n {\"name\": \"Stratasys\", \"type\": \"Company\"},\n {\"name\": \"PolyJet\", \"type\": \"3D Printing Technology\"},\n {\"name\": \"FDM\", \"type\": \"3D Printing Technology\"},\n {\"name\": \"PLA\", \"type\": \"FDM Material\"},\n {\"name\": \"ABS-PLUS\", \"type\": \"FDM Material\"},\n {\"name\": \"ABSM30\", \"type\": \"FDM Material\"},\n {\"name\": \"ABSI\", \"type\": \"FDM Material\"},\n {\"name\": \"ABS M30I\", \"type\": \"FDM Material\"},\n {\"name\": \"ABS-ESD7\", \"type\": \"FDM Material\"},\n {\"name\": \"ASA\", \"type\": \"FDM Material\"},\n {\"name\": \"NYLON 12\", \"type\": \"FDM Material\"},\n {\"name\": \"NYLON 6\", \"type\": \"FDM Material\"},\n {\"name\": \"PC-ABS\", \"type\": \"FDM Material\"},\n {\"name\": \"PC-ISO\", \"type\": \"FDM Material\"},\n {\"name\": \"ULTEM 9085\", \"type\": \"FDM Material\"},\n {\"name\": \"ULTEM 1010\", \"type\": \"FDM Material\"},\n {\"name\": \"Veros\", \"type\": \"PolyJet Material\"},\n {\"name\": \"Rigur\", \"type\": \"PolyJet Material\"},\n {\"name\": \"Durus\", \"type\": \"PolyJet Material\"},\n {\"name\": \"Tango\", \"type\": \"PolyJet Material\"},\n {\"name\": \"Agilus\", \"type\": \"PolyJet Material\"},\n {\"name\": \"Digital ABS\", \"type\": \"PolyJet Material\"},\n {\"name\": \"Thermoplastics\", \"type\": \"Material Category\"},\n {\"name\": \"SR support structures\", \"type\": \"Support Structure Type\"},\n {\"name\": \"BASS support structures\", \"type\": \"Support Structure Type\"},\n {\"name\": \"Thermal Resistance\", \"type\": \"ASTM Testing Standard\"},\n {\"name\": \"Tensile Strength\", \"type\": \"ASTM Testing Standard\"},\n {\"name\": \"Flexural Strength\", \"type\": \"ASTM Testing Standard\"},\n {\"name\": \"IZOD Impact\", \"type\": \"ASTM Testing Standard\"},\n {\"name\": \"Coefficient of Thermal Expansion\", \"type\": \"ASTM Testing Standard\"},\n {\"name\": \"Electrical Properties\", \"type\": \"ASTM Testing Standard\"},\n {\"name\": \"Water Absorption\", \"type\": \"ASTM Testing Standard\"},\n {\"name\": \"Shore Hardness\", \"type\": \"ASTM Testing Standard\"},\n {\"name\": \"Heat Deflection Temperature\", \"type\": \"Material Property\"},\n {\"name\": \"Tensile Modulus\", \"type\": \"Material Property\"},\n {\"name\": \"Elongation to Break\", \"type\": \"Material Property\"},\n {\"name\": \"Toughness\", \"type\": \"Material Property\"},\n {\"name\": \"Flexural Modulus\", \"type\": \"Material Property\"},\n {\"name\": \"Static Dissipative\", \"type\": \"Electrical Property\"},\n {\"name\": \"Surface Resistance\", \"type\": \"Electrical Property\"},\n {\"name\": \"Volume Resistance\", \"type\": \"Electrical Property\"},\n {\"name\": \"Shore A\", \"type\": \"Hardness Scale\"},\n {\"name\": \"Shore D\", \"type\": \"Hardness Scale\"},\n {\"name\": \"Materials Data Sheet\", \"type\": \"Document Type\"},\n {\"name\": \"Safety Data Sheet\", \"type\": \"Document Type\"},\n {\"name\": \"SDS\", \"type\": \"Document Type\"},\n {\"name\": \"REACH\", \"type\": \"Regulation\"},\n {\"name\": \"Global Harmonized System\", \"type\": \"Standard\"},\n {\"name\": \"GHS\", \"type\": \"Standard\"},\n {\"name\": \"Rigid Opaque\", \"type\": \"PolyJet Material Family\"},\n {\"name\": \"Rigid Transparent\", \"type\": \"PolyJet Material Family\"},\n {\"name\": \"Simulated Polypropylene Family\", \"type\": \"PolyJet Material Family\"},\n {\"name\": \"Flexible Rubber-like Family\", \"type\": \"PolyJet Material Family\"},\n {\"name\": \"Digital Material\", \"type\": \"PolyJet Material Family\"},\n {\"name\": \"Standard FDM Materials\", \"type\": \"FDM Material Category\"},\n {\"name\": \"Engineering FDM Materials\", \"type\": \"FDM Material Category\"},\n {\"name\": \"High Performance FDM Materials\", \"type\": \"FDM Material Category\"}\n ],\n \"edges\": [\n {\"subject\": \"Stratasys\", \"predicate\": \"manufactures\", \"object\": \"PolyJet\"},\n {\"subject\": \"Stratasys\", \"predicate\": \"manufactures\", \"object\": \"FDM\"},\n {\"subject\": \"PolyJet\", \"predicate\": \"is a\", \"object\": \"3D Printing Technology\"},\n {\"subject\": \"FDM\", \"predicate\": \"is a\", \"object\": \"3D Printing Technology\"},\n {\"subject\": \"PLA\", \"predicate\": \"is a\", \"object\": \"Standard FDM Materials\"},\n {\"subject\": \"ABS-PLUS\", \"predicate\": \"is a\", \"object\": \"Standard FDM Materials\"},\n {\"subject\": \"ABSM30\", \"predicate\": \"is a\", \"object\": \"Stand",
"error": null
},
"condition_b": {
"skipped": "local_model_failed",
"local_error": "HTTPConnectionPool(host='localhost', port=11434): Read timed out. (read timeout=120)",
"local_latency_s": 120.1
}
},
{
"source": "05_Entering the third dimension.docx",
"size_bucket": "medium",
"doc_chars_original": 2175,
"doc_chars_sent": 2175,
"truncated": false,
"condition_a": {
"input_tokens": 672,
"output_tokens": 998,
"latency_s": 5.16,
"entity_count": 22,
"edge_count": 22,
"stop_reason": "end_turn",
"response_text": "```json\n{\n \"entities\": [\n {\"name\": \"Rhino\", \"type\": \"Software\"},\n {\"name\": \"Surfaces\", \"type\": \"Geometric Component\"},\n {\"name\": \"NURBS objects\", \"type\": \"Geometric Object\"},\n {\"name\": \"Fabric\", \"type\": \"Analogy\"},\n {\"name\": \"Edge curves\", \"type\": \"Geometric Feature\"},\n {\"name\": \"Curves\", \"type\": \"Geometric Element\"},\n {\"name\": \"Seams\", \"type\": \"Geometric Feature\"},\n {\"name\": \"Border\", \"type\": \"Geometric Feature\"},\n {\"name\": \"Open cylinder\", \"type\": \"Surface Type\"},\n {\"name\": \"Torus\", \"type\": \"Surface Type\"},\n {\"name\": \"Isoparametric curves\", \"type\": \"Geometric Feature\"},\n {\"name\": \"Isocurves\", \"type\": \"Geometric Feature\"},\n {\"name\": \"U curves\", \"type\": \"Curve Direction\"},\n {\"name\": \"V curves\", \"type\": \"Curve Direction\"},\n {\"name\": \"Normal direction\", \"type\": \"Surface Property\"},\n {\"name\": \"Construction plane\", \"type\": \"Reference System\"},\n {\"name\": \"X and Y\", \"type\": \"Coordinate System\"},\n {\"name\": \"Rhino Options\", \"type\": \"Software Menu\"},\n {\"name\": \"Display Modes\", \"type\": \"Software Setting\"},\n {\"name\": \"Shaded\", \"type\": \"Display Mode\"},\n {\"name\": \"Back face settings\", \"type\": \"Software Setting\"},\n {\"name\": \"Lightweight extrusions\", \"type\": \"Surface Type\"}\n ],\n \"edges\": [\n {\"subject\": \"Surfaces\", \"predicate\": \"are the building blocks for\", \"object\": \"NURBS objects\"},\n {\"subject\": \"Surfaces\", \"predicate\": \"have\", \"object\": \"rectangular organization\"},\n {\"subject\": \"Surfaces\", \"predicate\": \"are compared to\", \"object\": \"Fabric\"},\n {\"subject\": \"Edge curves\", \"predicate\": \"describe the extent of\", \"object\": \"Surfaces\"},\n {\"subject\": \"Edge curves\", \"predicate\": \"are visually represented by\", \"object\": \"dark/thick lines\"},\n {\"subject\": \"Surfaces\", \"predicate\": \"can only be joined at\", \"object\": \"Edge curves\"},\n {\"subject\": \"Curves\", \"predicate\": \"can only be joined at\", \"object\": \"end points\"},\n {\"subject\": \"Seams\", \"predicate\": \"are formed when\", \"object\": \"Edge curves are joined\"},\n {\"subject\": \"Border\", \"predicate\": \"consists of\", \"object\": \"un-joined edges\"},\n {\"subject\": \"Surfaces\", \"predicate\": \"can be\", \"object\": \"open or closed\"},\n {\"subject\": \"Open cylinder\", \"predicate\": \"is closed in\", \"object\": \"one direction\"},\n {\"subject\": \"Torus\", \"predicate\": \"is closed in\", \"object\": \"both directions\"},\n {\"subject\": \"Isocurves\", \"predicate\": \"are visual aids for\", \"object\": \"Surfaces\"},\n {\"subject\": \"Isocurves\", \"predicate\": \"are lighter than\", \"object\": \"Edge curves\"},\n {\"subject\": \"Surfaces\", \"predicate\": \"have at least\", \"object\": \"two isocurves\"},\n {\"subject\": \"U curves\", \"predicate\": \"run in one direction\", \"object\": \"Surfaces\"},\n {\"subject\": \"V curves\", \"predicate\": \"run perpendicular to\", \"object\": \"U curves\"},\n {\"subject\": \"U and V\", \"predicate\": \"are used instead of\", \"object\": \"X and Y\"},\n {\"subject\": \"Surfaces\", \"predicate\": \"have\", \"object\": \"Normal direction\"},\n {\"subject\": \"Normal direction\", \"predicate\": \"indicates\", \"object\": \"inside or outside\"},\n {\"subject\": \"Display Modes\", \"predicate\": \"can be accessed through\", \"object\": \"Rhino Options\"},\n {\"subject\": \"Back face settings\", \"predicate\": \"is a feature of\", \"object\": \"Display Modes\"}\n ]\n}\n```",
"error": null
},
"condition_b": {
"local_latency_s": 17.22,
"local_candidates": [
"Rhino",
"NURBS objects",
"surfaces",
"Edge curves",
"Seams",
"Border",
"cylinder",
"torus",
"Isoparametric curves",
"Isocurves",
"U curves",
"V curves"
],
"local_raw": "{\n \"candidates\": [\"Rhino\", \"NURBS objects\", \"surfaces\", \"Edge curves\", \"Seams\", \"Border\", \"cylinder\", \"torus\", \"Isoparametric curves\", \"Isocurves\", \"U curves\", \"V curves\"]\n}",
"api_input_tokens": 767,
"api_output_tokens": 792,
"api_latency_s": 3.85,
"entity_count": 14,
"edge_count": 20,
"stop_reason": "end_turn",
"response_text": "```json\n{\n \"entities\": [\n {\"name\": \"Rhino\", \"type\": \"Software\"},\n {\"name\": \"NURBS objects\", \"type\": \"Geometric Object\"},\n {\"name\": \"surfaces\", \"type\": \"Geometric Object\"},\n {\"name\": \"Edge curves\", \"type\": \"Geometric Feature\"},\n {\"name\": \"Seams\", \"type\": \"Geometric Feature\"},\n {\"name\": \"Border\", \"type\": \"Geometric Feature\"},\n {\"name\": \"cylinder\", \"type\": \"Geometric Shape\"},\n {\"name\": \"torus\", \"type\": \"Geometric Shape\"},\n {\"name\": \"Isoparametric curves\", \"type\": \"Geometric Feature\"},\n {\"name\": \"Isocurves\", \"type\": \"Geometric Feature\"},\n {\"name\": \"U curves\", \"type\": \"Geometric Feature\"},\n {\"name\": \"V curves\", \"type\": \"Geometric Feature\"},\n {\"name\": \"Normal direction\", \"type\": \"Geometric Property\"},\n {\"name\": \"rectangular organization\", \"type\": \"Geometric Property\"}\n ],\n \"edges\": [\n {\"subject\": \"Rhino\", \"predicate\": \"uses\", \"object\": \"NURBS objects\"},\n {\"subject\": \"NURBS objects\", \"predicate\": \"are composed of\", \"object\": \"surfaces\"},\n {\"subject\": \"surfaces\", \"predicate\": \"have\", \"object\": \"rectangular organization\"},\n {\"subject\": \"surfaces\", \"predicate\": \"are bounded by\", \"object\": \"Edge curves\"},\n {\"subject\": \"Edge curves\", \"predicate\": \"are visually represented as\", \"object\": \"dark/thick lines\"},\n {\"subject\": \"surfaces\", \"predicate\": \"can be joined at\", \"object\": \"Edge curves\"},\n {\"subject\": \"Edge curves\", \"predicate\": \"become\", \"object\": \"Seams\"},\n {\"subject\": \"Seams\", \"predicate\": \"are\", \"object\": \"joined edges\"},\n {\"subject\": \"Border\", \"predicate\": \"consists of\", \"object\": \"un-joined edges\"},\n {\"subject\": \"cylinder\", \"predicate\": \"is\", \"object\": \"open or closed\"},\n {\"subject\": \"torus\", \"predicate\": \"is closed in\", \"object\": \"both directions\"},\n {\"subject\": \"surfaces\", \"predicate\": \"contain\", \"object\": \"Isoparametric curves\"},\n {\"subject\": \"Isoparametric curves\", \"predicate\": \"are also called\", \"object\": \"Isocurves\"},\n {\"subject\": \"Isocurves\", \"predicate\": \"are perpendicular to\", \"object\": \"one another\"},\n {\"subject\": \"Isocurves\", \"predicate\": \"are lighter than\", \"object\": \"Edge curves\"},\n {\"subject\": \"Isocurves\", \"predicate\": \"consist of\", \"object\": \"U curves\"},\n {\"subject\": \"Isocurves\", \"predicate\": \"consist of\", \"object\": \"V curves\"},\n {\"subject\": \"U curves\", \"predicate\": \"run perpendicular to\", \"object\": \"V curves\"},\n {\"subject\": \"surfaces\", \"predicate\": \"have\", \"object\": \"Normal direction\"},\n {\"subject\": \"Normal direction\", \"predicate\": \"indicates\", \"object\": \"inside or outside\"}\n ]\n}\n```",
"error": null
}
},
{
"source": "05_PPT_ALL_Machine Technology and Specifications_v3.pptx",
"size_bucket": "large",
"doc_chars_original": 29095,
"doc_chars_sent": 8000,
"truncated": true,
"condition_a": {
"input_tokens": 2207,
"output_tokens": 1840,
"latency_s": 9.13,
"entity_count": 30,
"edge_count": 46,
"stop_reason": "end_turn",
"response_text": "```json\n{\n \"entities\": [\n {\"name\": \"Stratasys\", \"type\": \"Company\"},\n {\"name\": \"FDM\", \"type\": \"Technology\"},\n {\"name\": \"PolyJet\", \"type\": \"Technology\"},\n {\"name\": \"F123 series\", \"type\": \"Printer\"},\n {\"name\": \"F170\", \"type\": \"Printer\"},\n {\"name\": \"F270\", \"type\": \"Printer\"},\n {\"name\": \"F370\", \"type\": \"Printer\"},\n {\"name\": \"Fortus 450MC\", \"type\": \"Printer\"},\n {\"name\": \"Fortus 380CF\", \"type\": \"Printer\"},\n {\"name\": \"J750\", \"type\": \"Printer\"},\n {\"name\": \"ABS\", \"type\": \"Material\"},\n {\"name\": \"PLA\", \"type\": \"Material\"},\n {\"name\": \"ASA\", \"type\": \"Material\"},\n {\"name\": \"PC-ABS\", \"type\": \"Material\"},\n {\"name\": \"QSR Support\", \"type\": \"Material\"},\n {\"name\": \"GrabCAD Print\", \"type\": \"Software\"},\n {\"name\": \"Insight\", \"type\": \"Software\"},\n {\"name\": \"SCA-3600 Cleaning Station\", \"type\": \"Equipment\"},\n {\"name\": \"SCA 1200 Cleaning Station\", \"type\": \"Equipment\"},\n {\"name\": \"Infinite-Build\", \"type\": \"Technology Feature\"},\n {\"name\": \"Continuous Build\", \"type\": \"Technology Feature\"},\n {\"name\": \"Concept Verification\", \"type\": \"Use Case\"},\n {\"name\": \"Design Validation\", \"type\": \"Use Case\"},\n {\"name\": \"Functional Performance\", \"type\": \"Use Case\"},\n {\"name\": \"Jigs & Fixtures\", \"type\": \"Use Case\"},\n {\"name\": \"Rapid Prototyping\", \"type\": \"Use Case\"},\n {\"name\": \"Motocross helmet air vent\", \"type\": \"Application Example\"},\n {\"name\": \"Smart home switch housing\", \"type\": \"Application Example\"},\n {\"name\": \"Mechanical iris\", \"type\": \"Application Example\"},\n {\"name\": \"1988\", \"type\": \"Year\"}\n ],\n \"edges\": [\n {\"subject\": \"Stratasys\", \"predicate\": \"manufactures\", \"object\": \"FDM\"},\n {\"subject\": \"Stratasys\", \"predicate\": \"manufactures\", \"object\": \"PolyJet\"},\n {\"subject\": \"Stratasys\", \"predicate\": \"manufactures\", \"object\": \"F123 series\"},\n {\"subject\": \"Stratasys\", \"predicate\": \"manufactures\", \"object\": \"F170\"},\n {\"subject\": \"Stratasys\", \"predicate\": \"manufactures\", \"object\": \"F270\"},\n {\"subject\": \"Stratasys\", \"predicate\": \"manufactures\", \"object\": \"F370\"},\n {\"subject\": \"Stratasys\", \"predicate\": \"manufactures\", \"object\": \"Fortus 450MC\"},\n {\"subject\": \"Stratasys\", \"predicate\": \"manufactures\", \"object\": \"Fortus 380CF\"},\n {\"subject\": \"Stratasys\", \"predicate\": \"manufactures\", \"object\": \"J750\"},\n {\"subject\": \"F123 series\", \"predicate\": \"includes\", \"object\": \"F170\"},\n {\"subject\": \"F123 series\", \"predicate\": \"includes\", \"object\": \"F270\"},\n {\"subject\": \"F123 series\", \"predicate\": \"includes\", \"object\": \"F370\"},\n {\"subject\": \"F123 series\", \"predicate\": \"uses technology\", \"object\": \"FDM\"},\n {\"subject\": \"F170\", \"predicate\": \"has build size\", \"object\": \"10x10x10 inches\"},\n {\"subject\": \"F270\", \"predicate\": \"has build size\", \"object\": \"12x10x12 inches\"},\n {\"subject\": \"F370\", \"predicate\": \"has build size\", \"object\": \"14x10x14 inches\"},\n {\"subject\": \"F123 series\", \"predicate\": \"supports materials\", \"object\": \"ABS\"},\n {\"subject\": \"F123 series\", \"predicate\": \"supports materials\", \"object\": \"PLA\"},\n {\"subject\": \"F123 series\", \"predicate\": \"supports materials\", \"object\": \"ASA\"},\n {\"subject\": \"F370\", \"predicate\": \"supports materials\", \"object\": \"PC-ABS\"},\n {\"subject\": \"F123 series\", \"predicate\": \"uses software\", \"object\": \"GrabCAD Print\"},\n {\"subject\": \"J750\", \"predicate\": \"compatible with\", \"object\": \"GrabCAD Print\"},\n {\"subject\": \"F123 series\", \"predicate\": \"uses software\", \"object\": \"Insight\"},\n {\"subject\": \"F123 series\", \"predicate\": \"compatible with\", \"object\": \"SCA-3600 Cleaning Station\"},\n {\"subject\": \"F123 series\", \"predicate\": \"compatible with\", \"object\": \"SCA 1200 Cleaning Station\"},\n {\"subject\": \"FDM\", \"predicate\": \"founded by\", \"object\": \"Stratasys\"},\n {\"subject\": \"FDM\", \"predicate\": \"founded in\", \"object\": \"1988\"},\n {\"subject\": \"PolyJet\", \"predicate\": \"evolved from\", \"object\": \"1 material to 6 materials\"},\n {\"subject\": \"F170\", \"predicate\": \"designed for\", \"object\": \"Schools and professionals\"},\n {\"sub",
"error": null
},
"condition_b": {
"local_latency_s": 84.99,
"local_candidates": [
"Stratasys",
"FDM",
"PolyJet",
"YouTube",
"Industrial Manufacturing",
"Strategy",
"F123 series",
"Fortus 450MC",
"J750",
"GrabCAD Print",
"ABS",
"PLA",
"ASA",
"PC-ABS",
"QSR Support",
"SCA-3600 Cleaning Station",
"SCA 1200 Cleaning Station",
"STREAMLINED DESIGN-TO-3D PRINT"
],
"local_raw": "{\n \"candidates\": [\n \"Stratasys\",\n \"FDM\",\n \"PolyJet\",\n \"YouTube\",\n \"Industrial Manufacturing\",\n \"Strategy\",\n \"F123 series\",\n \"Fortus 450MC\",\n \"J750\",\n \"GrabCAD Print\",\n \"ABS\",\n \"PLA\",\n \"ASA\",\n \"PC-ABS\",\n \"QSR Support\",\n \"SCA-3600 Cleaning Station\",\n \"SCA 1200 Cleaning Station\",\n \"STREAMLINED DESIGN-TO-3D PRINT\"\n ]\n}",
"api_input_tokens": 2358,
"api_output_tokens": 1391,
"api_latency_s": 7.11,
"entity_count": 24,
"edge_count": 35,
"stop_reason": "end_turn",
"response_text": "```json\n{\n \"entities\": [\n {\"name\": \"Stratasys\", \"type\": \"Company\"},\n {\"name\": \"FDM\", \"type\": \"Technology\"},\n {\"name\": \"PolyJet\", \"type\": \"Technology\"},\n {\"name\": \"YouTube\", \"type\": \"Platform\"},\n {\"name\": \"Industrial Manufacturing\", \"type\": \"Industry\"},\n {\"name\": \"Strategy\", \"type\": \"Concept\"},\n {\"name\": \"F123 series\", \"type\": \"Printer Model\"},\n {\"name\": \"F170\", \"type\": \"Printer Model\"},\n {\"name\": \"F270\", \"type\": \"Printer Model\"},\n {\"name\": \"F370\", \"type\": \"Printer Model\"},\n {\"name\": \"Fortus 450MC\", \"type\": \"Printer Model\"},\n {\"name\": \"Fortus 380CF\", \"type\": \"Printer Model\"},\n {\"name\": \"J750\", \"type\": \"Printer Model\"},\n {\"name\": \"GrabCAD Print\", \"type\": \"Software\"},\n {\"name\": \"ABS\", \"type\": \"Material\"},\n {\"name\": \"PLA\", \"type\": \"Material\"},\n {\"name\": \"ASA\", \"type\": \"Material\"},\n {\"name\": \"PC-ABS\", \"type\": \"Material\"},\n {\"name\": \"QSR Support\", \"type\": \"Support Material\"},\n {\"name\": \"SCA-3600 Cleaning Station\", \"type\": \"Equipment\"},\n {\"name\": \"SCA 1200 Cleaning Station\", \"type\": \"Equipment\"},\n {\"name\": \"Insight\", \"type\": \"Software\"},\n {\"name\": \"Infinite-Build\", \"type\": \"Technology Feature\"},\n {\"name\": \"Continuous Build\", \"type\": \"Technology Feature\"}\n ],\n \"edges\": [\n {\"subject\": \"Stratasys\", \"predicate\": \"manufactures\", \"object\": \"FDM\"},\n {\"subject\": \"Stratasys\", \"predicate\": \"manufactures\", \"object\": \"PolyJet\"},\n {\"subject\": \"Stratasys\", \"predicate\": \"manufactures\", \"object\": \"F123 series\"},\n {\"subject\": \"Stratasys\", \"predicate\": \"manufactures\", \"object\": \"Fortus 450MC\"},\n {\"subject\": \"Stratasys\", \"predicate\": \"manufactures\", \"object\": \"Fortus 380CF\"},\n {\"subject\": \"Stratasys\", \"predicate\": \"manufactures\", \"object\": \"J750\"},\n {\"subject\": \"F123 series\", \"predicate\": \"includes models\", \"object\": \"F170\"},\n {\"subject\": \"F123 series\", \"predicate\": \"includes models\", \"object\": \"F270\"},\n {\"subject\": \"F123 series\", \"predicate\": \"includes models\", \"object\": \"F370\"},\n {\"subject\": \"F123 series\", \"predicate\": \"uses technology\", \"object\": \"FDM\"},\n {\"subject\": \"F170\", \"predicate\": \"supports materials\", \"object\": \"ABS\"},\n {\"subject\": \"F170\", \"predicate\": \"supports materials\", \"object\": \"PLA\"},\n {\"subject\": \"F170\", \"predicate\": \"supports materials\", \"object\": \"ASA\"},\n {\"subject\": \"F270\", \"predicate\": \"supports materials\", \"object\": \"ABS\"},\n {\"subject\": \"F270\", \"predicate\": \"supports materials\", \"object\": \"PLA\"},\n {\"subject\": \"F270\", \"predicate\": \"supports materials\", \"object\": \"ASA\"},\n {\"subject\": \"F370\", \"predicate\": \"supports materials\", \"object\": \"ABS\"},\n {\"subject\": \"F370\", \"predicate\": \"supports materials\", \"object\": \"PLA\"},\n {\"subject\": \"F370\", \"predicate\": \"supports materials\", \"object\": \"ASA\"},\n {\"subject\": \"F370\", \"predicate\": \"supports materials\", \"object\": \"PC-ABS\"},\n {\"subject\": \"F123 series\", \"predicate\": \"uses software\", \"object\": \"GrabCAD Print\"},\n {\"subject\": \"F123 series\", \"predicate\": \"compatible with cleaning station\", \"object\": \"SCA-3600 Cleaning Station\"},\n {\"subject\": \"F123 series\", \"predicate\": \"compatible with cleaning station\", \"object\": \"SCA 1200 Cleaning Station\"},\n {\"subject\": \"F123 series\", \"predicate\": \"supports\", \"object\": \"QSR Support\"},\n {\"subject\": \"J750\", \"predicate\": \"compatible with software\", \"object\": \"GrabCAD Print\"},\n {\"subject\": \"GrabCAD Print\", \"predicate\": \"connects to\", \"object\": \"Stratasys\"},\n {\"subject\": \"Insight\", \"predicate\": \"optimizes\", \"object\": \"F123 series\"},\n {\"subject\": \"FDM\", \"predicate\": \"founded by\", \"object\": \"Stratasys\"},\n {\"subject\": \"Industrial Manufacturing\", \"predicate\": \"requires\", \"object\": \"Strategy\"},\n {\"subject\": \"F123 series\", \"predicate\": \"used for\", \"object\": \"Rapid Prototyping\"},\n {\"subject\": \"F123 series\", \"predicate\": \"used for\", \"object\": \"Concept Verification\"},\n {\"subject\": \"F123 series\", \"predicate\": \"used for\", \"object\": \"Design Validation\"},\n {\"subject\": \"F123 series\", \"predicate\": \"used for\",",
"error": null
}
},
{
"source": "06_Gumball.docx",
"size_bucket": "medium",
"doc_chars_original": 1980,
"doc_chars_sent": 1980,
"truncated": false,
"condition_a": {
"input_tokens": 652,
"output_tokens": 1437,
"latency_s": 6.71,
"entity_count": 34,
"edge_count": 29,
"stop_reason": "end_turn",
"response_text": "```json\n{\n \"entities\": [\n {\"name\": \"Gumball\", \"type\": \"Tool\"},\n {\"name\": \"Widget\", \"type\": \"Component\"},\n {\"name\": \"Selected object\", \"type\": \"Entity\"},\n {\"name\": \"Move transformation\", \"type\": \"Transformation\"},\n {\"name\": \"Scale transformation\", \"type\": \"Transformation\"},\n {\"name\": \"Rotate transformation\", \"type\": \"Transformation\"},\n {\"name\": \"Gumball origin\", \"type\": \"Reference point\"},\n {\"name\": \"Gumball menu\", \"type\": \"Interface\"},\n {\"name\": \"White menu ball\", \"type\": \"UI element\"},\n {\"name\": \"Relocate Gumball\", \"type\": \"Menu option\"},\n {\"name\": \"Gumball center\", \"type\": \"Reference point\"},\n {\"name\": \"Reset Gumball\", \"type\": \"Menu option\"},\n {\"name\": \"Area centroid\", \"type\": \"Reference point\"},\n {\"name\": \"Align to CPlane\", \"type\": \"Menu option\"},\n {\"name\": \"Align to Object\", \"type\": \"Menu option\"},\n {\"name\": \"Align to World\", \"type\": \"Menu option\"},\n {\"name\": \"CPlane\", \"type\": \"Coordinate system\"},\n {\"name\": \"X axis\", \"type\": \"Axis\"},\n {\"name\": \"Y axis\", \"type\": \"Axis\"},\n {\"name\": \"Z axis\", \"type\": \"Axis\"},\n {\"name\": \"U curve\", \"type\": \"Object property\"},\n {\"name\": \"V curve\", \"type\": \"Object property\"},\n {\"name\": \"World grid system\", \"type\": \"Coordinate system\"},\n {\"name\": \"Snappy dragging\", \"type\": \"Dragging mode\"},\n {\"name\": \"Smooth dragging\", \"type\": \"Dragging mode\"},\n {\"name\": \"Osnaps\", \"type\": \"Feature\"},\n {\"name\": \"Drag strength\", \"type\": \"Parameter\"},\n {\"name\": \"Gumball elements\", \"type\": \"Component\"},\n {\"name\": \"Red color\", \"type\": \"Color\"},\n {\"name\": \"Green color\", \"type\": \"Color\"},\n {\"name\": \"Blue color\", \"type\": \"Color\"},\n {\"name\": \"Dotted lines\", \"type\": \"Visual element\"},\n {\"name\": \"Arrow\", \"type\": \"Visual element\"},\n {\"name\": \"Box\", \"type\": \"Visual element\"}\n ],\n \"edges\": [\n {\"subject\": \"Gumball\", \"predicate\": \"is a\", \"object\": \"Widget\"},\n {\"subject\": \"Gumball\", \"predicate\": \"is used to facilitate\", \"object\": \"Direct editing\"},\n {\"subject\": \"Gumball\", \"predicate\": \"provides\", \"object\": \"Move transformation\"},\n {\"subject\": \"Gumball\", \"predicate\": \"provides\", \"object\": \"Scale transformation\"},\n {\"subject\": \"Gumball\", \"predicate\": \"provides\", \"object\": \"Rotate transformation\"},\n {\"subject\": \"Gumball\", \"predicate\": \"operates around\", \"object\": \"Gumball origin\"},\n {\"subject\": \"Gumball menu\", \"predicate\": \"is accessed by\", \"object\": \"White menu ball\"},\n {\"subject\": \"Relocate Gumball\", \"predicate\": \"allows redefinition of\", \"object\": \"Gumball center\"},\n {\"subject\": \"Gumball center\", \"predicate\": \"is the origin point for\", \"object\": \"Scaling\"},\n {\"subject\": \"Gumball center\", \"predicate\": \"is the origin point for\", \"object\": \"Translating\"},\n {\"subject\": \"Gumball center\", \"predicate\": \"is the origin point for\", \"object\": \"Rotating\"},\n {\"subject\": \"Reset Gumball\", \"predicate\": \"places gumball at\", \"object\": \"Area centroid\"},\n {\"subject\": \"Align to CPlane\", \"predicate\": \"makes arrows point in direction of\", \"object\": \"X axis\"},\n {\"subject\": \"Align to CPlane\", \"predicate\": \"makes arrows point in direction of\", \"object\": \"Y axis\"},\n {\"subject\": \"Align to CPlane\", \"predicate\": \"makes arrows point in direction of\", \"object\": \"Z axis\"},\n {\"subject\": \"Align to Object\", \"predicate\": \"relates gumball to\", \"object\": \"U curve\"},\n {\"subject\": \"Align to Object\", \"predicate\": \"relates gumball to\", \"object\": \"V curve\"},\n {\"subject\": \"Align to World\", \"predicate\": \"relates gumball to\", \"object\": \"World grid system\"},\n {\"subject\": \"Smooth dragging\", \"predicate\": \"allows dragging without\", \"object\": \"Restraints\"},\n {\"subject\": \"Snappy dragging\", \"predicate\": \"is equivalent to\", \"object\": \"Osnaps\"},\n {\"subject\": \"Snappy dragging\", \"predicate\": \"causes object to snap to\", \"object\": \"End point\"},\n {\"subject\": \"Snappy dragging\", \"predicate\": \"causes object to snap to\", \"object\": \"Mid point\"},\n {\"subject\": \"Drag strength\", \"predicate\": \"dictates how\", \"object\": \"Mouse mov",
"error": null
},
"condition_b": {
"local_latency_s": 19.62,
"local_candidates": [
"Gumball",
"Menu",
"White menu ball",
"Relocate Gumball",
"Origin point",
"Scaling",
"Translating",
"Rotating",
"Reset Gumball",
"Area centroid",
"CPlane",
"Object",
"World",
"Snappy draging",
"Smooth draging",
"Osnaps",
"Illustraitor",
"Drag Strength",
"Red",
"Green",
"Blue"
],
"local_raw": "{\n \"candidates\": [\"Gumball\", \"Menu\", \"White menu ball\", \"Relocate Gumball\", \"Origin point\", \"Scaling\", \"Translating\", \"Rotating\", \"Reset Gumball\", \"Area centroid\", \"CPlane\", \"Object\", \"World\", \"Snappy draging\", \"Smooth draging\", \"Osnaps\", \"Illustraitor\", \"Drag Strength\", \"Red\", \"Green\", \"Blue\"]\n}",
"api_input_tokens": 792,
"api_output_tokens": 1050,
"api_latency_s": 5.15,
"entity_count": 24,
"edge_count": 23,
"stop_reason": "end_turn",
"response_text": "```json\n{\n \"entities\": [\n {\"name\": \"Gumball\", \"type\": \"Widget\"},\n {\"name\": \"Menu\", \"type\": \"UI Component\"},\n {\"name\": \"White menu ball\", \"type\": \"UI Element\"},\n {\"name\": \"Relocate Gumball\", \"type\": \"Function\"},\n {\"name\": \"Origin point\", \"type\": \"Concept\"},\n {\"name\": \"Scaling\", \"type\": \"Transformation\"},\n {\"name\": \"Translating\", \"type\": \"Transformation\"},\n {\"name\": \"Rotating\", \"type\": \"Transformation\"},\n {\"name\": \"Reset Gumball\", \"type\": \"Function\"},\n {\"name\": \"Area centroid\", \"type\": \"Concept\"},\n {\"name\": \"CPlane\", \"type\": \"Alignment Option\"},\n {\"name\": \"Object\", \"type\": \"Alignment Option\"},\n {\"name\": \"World\", \"type\": \"Alignment Option\"},\n {\"name\": \"Snappy dragging\", \"type\": \"Dragging Mode\"},\n {\"name\": \"Smooth dragging\", \"type\": \"Dragging Mode\"},\n {\"name\": \"Osnaps\", \"type\": \"Feature\"},\n {\"name\": \"Drag Strength\", \"type\": \"Parameter\"},\n {\"name\": \"Red\", \"type\": \"Color\"},\n {\"name\": \"Green\", \"type\": \"Color\"},\n {\"name\": \"Blue\", \"type\": \"Color\"},\n {\"name\": \"X axis\", \"type\": \"Axis\"},\n {\"name\": \"Y axis\", \"type\": \"Axis\"},\n {\"name\": \"Z axis\", \"type\": \"Axis\"},\n {\"name\": \"Illustrator\", \"type\": \"Software\"}\n ],\n \"edges\": [\n {\"subject\": \"Gumball\", \"predicate\": \"is a widget used to facilitate\", \"object\": \"direct editing\"},\n {\"subject\": \"Gumball\", \"predicate\": \"provides\", \"object\": \"Scaling\"},\n {\"subject\": \"Gumball\", \"predicate\": \"provides\", \"object\": \"Translating\"},\n {\"subject\": \"Gumball\", \"predicate\": \"provides\", \"object\": \"Rotating\"},\n {\"subject\": \"White menu ball\", \"predicate\": \"opens\", \"object\": \"Menu\"},\n {\"subject\": \"Relocate Gumball\", \"predicate\": \"redefines\", \"object\": \"Origin point\"},\n {\"subject\": \"Origin point\", \"predicate\": \"is the center for\", \"object\": \"Scaling\"},\n {\"subject\": \"Origin point\", \"predicate\": \"is the center for\", \"object\": \"Translating\"},\n {\"subject\": \"Origin point\", \"predicate\": \"is the center for\", \"object\": \"Rotating\"},\n {\"subject\": \"Reset Gumball\", \"predicate\": \"places gumball at\", \"object\": \"Area centroid\"},\n {\"subject\": \"Gumball\", \"predicate\": \"can align to\", \"object\": \"CPlane\"},\n {\"subject\": \"Gumball\", \"predicate\": \"can align to\", \"object\": \"Object\"},\n {\"subject\": \"Gumball\", \"predicate\": \"can align to\", \"object\": \"World\"},\n {\"subject\": \"CPlane alignment\", \"predicate\": \"points arrows in direction of\", \"object\": \"X axis\"},\n {\"subject\": \"CPlane alignment\", \"predicate\": \"points arrows in direction of\", \"object\": \"Y axis\"},\n {\"subject\": \"CPlane alignment\", \"predicate\": \"points arrows in direction of\", \"object\": \"Z axis\"},\n {\"subject\": \"Smooth dragging\", \"predicate\": \"allows dragging without\", \"object\": \"restraints\"},\n {\"subject\": \"Snappy dragging\", \"predicate\": \"is equivalent to turning on\", \"object\": \"Osnaps\"},\n {\"subject\": \"Drag Strength\", \"predicate\": \"dictates how mouse movement affects\", \"object\": \"object\"},\n {\"subject\": \"Red\", \"predicate\": \"is assigned to\", \"object\": \"X axis\"},\n {\"subject\": \"Green\", \"predicate\": \"is assigned to\", \"object\": \"Y axis\"},\n {\"subject\": \"Blue\", \"predicate\": \"is assigned to\", \"object\": \"Z axis\"},\n {\"subject\": \"Blue element\", \"predicate\": \"alters object in\", \"object\": \"Z axis\"}\n ]\n}\n```",
"error": null
}
}
]
}
+332
View File
@@ -0,0 +1,332 @@
[
{
"name": "ChatGPT: Provide DXF file",
"binary": "single",
"score": 5,
"note": null,
"bucket": "low",
"delta_preds": 4,
"delta_edges": 11,
"prod_preds": 5,
"cascade_preds": 9
},
{
"name": "Claude: Internship agreement writing help",
"binary": "single",
"score": 5,
"note": null,
"bucket": "mid",
"delta_preds": 0,
"delta_edges": 1,
"prod_preds": 6,
"cascade_preds": 6
},
{
"name": "Claude: Interview presentation research and preparation",
"binary": "multi",
"score": 3,
"note": "i would say that this is edu and prof",
"bucket": "mid",
"delta_preds": -4,
"delta_edges": 0,
"prod_preds": 9,
"cascade_preds": 5
},
{
"name": "Circuits II.pptx",
"binary": "single",
"score": 5,
"note": null,
"bucket": "document",
"delta_preds": 2,
"delta_edges": 5,
"prod_preds": 5,
"cascade_preds": 7
},
{
"name": "ChatGPT: Scholarship Recommendation Letter Tips",
"binary": "single",
"score": 5,
"note": null,
"bucket": "low",
"delta_preds": -1,
"delta_edges": -1,
"prod_preds": 2,
"cascade_preds": 1
},
{
"name": "ChatGPT: SEC coaches with OSU ties",
"binary": "single",
"score": 5,
"note": null,
"bucket": "high",
"delta_preds": -1,
"delta_edges": -1,
"prod_preds": 5,
"cascade_preds": 4
},
{
"name": "ChatGPT: Tulsa Concept Album Guide",
"binary": "single",
"score": 5,
"note": null,
"bucket": "high",
"delta_preds": -6,
"delta_edges": -3,
"prod_preds": 10,
"cascade_preds": 4
},
{
"name": "Claude: I filling out my annual report...",
"binary": "multi",
"score": 3,
"note": "edu and prof",
"bucket": "mid",
"delta_preds": 1,
"delta_edges": 3,
"prod_preds": 6,
"cascade_preds": 7
},
{
"name": "ChatGPT: Remington 700 5R Gen 1",
"binary": "multi",
"score": 3,
"note": "edu and personal",
"bucket": "mid",
"delta_preds": 1,
"delta_edges": 0,
"prod_preds": 7,
"cascade_preds": 8
},
{
"name": "ChatGPT: Research Statement Restructure",
"binary": "multi",
"score": 3,
"note": "edu and prof and personal maybe depending on how you see a job search",
"bucket": "mid",
"delta_preds": -2,
"delta_edges": 3,
"prod_preds": 4,
"cascade_preds": 2
},
{
"name": "Claude: SUNY faculty conflict of interest policies",
"binary": "multi",
"score": 2,
"note": "this is edu also prof but might also be personal becuase its work outside of my normal job",
"bucket": "mid",
"delta_preds": -4,
"delta_edges": -7,
"prod_preds": 7,
"cascade_preds": 3
},
{
"name": "Nic Oconnor Field Work F2023 Syllabus.pdf",
"binary": "single",
"score": 5,
"note": null,
"bucket": "document",
"delta_preds": 9,
"delta_edges": 7,
"prod_preds": 5,
"cascade_preds": 14
},
{
"name": "New Mexico Cover Letter.docx",
"binary": "multi",
"score": 3,
"note": "and application letter to an EDU job fits in two domains",
"bucket": "document",
"delta_preds": 3,
"delta_edges": 8,
"prod_preds": 7,
"cascade_preds": 10
},
{
"name": "ChatGPT: Testing Vector Alignment",
"binary": "single",
"score": 5,
"note": null,
"bucket": "low",
"delta_preds": 0,
"delta_edges": 0,
"prod_preds": 3,
"cascade_preds": 3
},
{
"name": "ChatGPT: Sink Sprayer Fitting Name",
"binary": "single",
"score": 5,
"note": null,
"bucket": "low",
"delta_preds": 0,
"delta_edges": -1,
"prod_preds": 3,
"cascade_preds": 3
},
{
"name": "Polar Coordinates.pptx",
"binary": "multi",
"score": 3,
"note": "this could be both edu and professional",
"bucket": "document",
"delta_preds": 0,
"delta_edges": 0,
"prod_preds": 8,
"cascade_preds": 8
},
{
"name": "ChatGPT: Rhino 3D object flow",
"binary": "multi",
"score": 3,
"note": "this is the same its edu but part of my profession",
"bucket": "high",
"delta_preds": -5,
"delta_edges": -3,
"prod_preds": 16,
"cascade_preds": 11
},
{
"name": "Aaron AI: So, I've been working on the RNAI project, and the way I've ...",
"binary": "multi",
"score": 3,
"note": "technical but apersonal project",
"bucket": "mid",
"delta_preds": 1,
"delta_edges": 2,
"prod_preds": 5,
"cascade_preds": 6
},
{
"name": "ChatGPT: Regex for inserting letters",
"binary": "single",
"score": 5,
"note": "this is a technical question",
"bucket": "low",
"delta_preds": 0,
"delta_edges": 0,
"prod_preds": 4,
"cascade_preds": 4
},
{
"name": "Claude: Evaluating tenure prospects at R1 universities",
"binary": "multi",
"score": 3,
"note": "this to me is EDU and professional",
"bucket": "high",
"delta_preds": 12,
"delta_edges": 31,
"prod_preds": 3,
"cascade_preds": 15
},
{
"name": "ChatGPT: Title: User request summary.",
"binary": "multi",
"score": 3,
"note": "this is agina is edu and prfoessional",
"bucket": "low",
"delta_preds": -1,
"delta_edges": -1,
"prod_preds": 3,
"cascade_preds": 2
},
{
"name": "University of North Texas Cover letter.pdf",
"binary": "multi",
"score": 3,
"note": "edu and professional, im applying for a job i forgot i applied for",
"bucket": "document",
"delta_preds": -2,
"delta_edges": -1,
"prod_preds": 10,
"cascade_preds": 8
},
{
"name": "ChatGPT: Resume formatting and review",
"binary": "multi",
"score": 3,
"note": "personal and professional, im trying to get anotehr job",
"bucket": "high",
"delta_preds": 1,
"delta_edges": 3,
"prod_preds": 8,
"cascade_preds": 9
},
{
"name": "Nic Oconnor Ind Study S2024 Syllabus.docx",
"binary": "single",
"score": 5,
"note": null,
"bucket": "document",
"delta_preds": 2,
"delta_edges": 1,
"prod_preds": 6,
"cascade_preds": 8
},
{
"name": "Claude: Lubbock on everything album lyrics",
"binary": "single",
"score": 5,
"note": "personal too tho",
"bucket": "high",
"delta_preds": -7,
"delta_edges": -12,
"prod_preds": 11,
"cascade_preds": 4
},
{
"name": "Claude: Bonding ASA 3D printed parts",
"binary": "multi",
"score": 3,
"note": "edu and professional",
"bucket": "mid",
"delta_preds": -1,
"delta_edges": 1,
"prod_preds": 5,
"cascade_preds": 4
},
{
"name": "ChatGPT: Respect Individual Interests for Christmas",
"binary": "single",
"score": 5,
"note": "personal to me not edu",
"bucket": "low",
"delta_preds": -2,
"delta_edges": -2,
"prod_preds": 3,
"cascade_preds": 1
},
{
"name": "Claude: Finding ideal rural housing near University of Utah",
"binary": "multi",
"score": 3,
"note": "personal too where do i live?",
"bucket": "high",
"delta_preds": -1,
"delta_edges": 2,
"prod_preds": 14,
"cascade_preds": 13
},
{
"name": "Claude: Law enforcement career options",
"binary": "multi",
"score": 3,
"note": "edu and perfessional, where do i work?",
"bucket": "high",
"delta_preds": 1,
"delta_edges": -2,
"prod_preds": 18,
"cascade_preds": 19
},
{
"name": "ChatGPT: Rectangle Edge Offset Algorithm",
"binary": "single",
"score": 5,
"note": "this is me asking for technical help",
"bucket": "low",
"delta_preds": 0,
"delta_edges": 3,
"prod_preds": 2,
"cascade_preds": 2
}
]
+216
View File
@@ -0,0 +1,216 @@
{
"ratings": [
{
"binary": "single",
"score": 5,
"note": null,
"name": "ChatGPT: Provide DXF file"
},
{
"binary": "single",
"score": 5,
"note": null,
"name": "Claude: Internship agreement writing help"
},
{
"binary": "multi",
"score": 3,
"note": "i would say that this is edu and prof",
"name": "Claude: Interview presentation research and preparation"
},
{
"binary": "single",
"score": 5,
"note": null,
"name": "Circuits II.pptx"
},
{
"binary": "single",
"score": 5,
"note": null,
"name": "ChatGPT: Scholarship Recommendation Letter Tips"
},
{
"binary": "single",
"score": 5,
"note": null,
"name": "ChatGPT: SEC coaches with OSU ties"
},
{
"binary": "single",
"score": 5,
"note": null,
"name": "ChatGPT: Tulsa Concept Album Guide"
},
{
"binary": "multi",
"score": 3,
"note": "edu and prof",
"name": "Claude: I filling out my annual report..."
},
{
"binary": "multi",
"score": 3,
"note": "edu and personal",
"name": "ChatGPT: Remington 700 5R Gen 1"
},
{
"binary": "multi",
"score": 3,
"note": "edu and prof and personal maybe depending on how you see a job search",
"name": "ChatGPT: Research Statement Restructure"
},
{
"binary": "multi",
"score": 2,
"note": "this is edu also prof but might also be personal becuase its work outside of my normal job",
"name": "Claude: SUNY faculty conflict of interest policies"
},
{
"binary": "single",
"score": 5,
"note": null,
"name": "Nic Oconnor Field Work F2023 Syllabus.pdf"
},
{
"binary": "multi",
"score": 3,
"note": "and application letter to an EDU job fits in two domains",
"name": "New Mexico Cover Letter.docx"
},
{
"binary": "single",
"score": 5,
"note": null,
"name": "ChatGPT: Testing Vector Alignment"
},
{
"binary": "single",
"score": 5,
"note": null,
"name": "ChatGPT: Sink Sprayer Fitting Name"
},
{
"binary": "multi",
"score": 3,
"note": "this could be both edu and professional",
"name": "Polar Coordinates.pptx"
},
{
"binary": "multi",
"score": 3,
"note": "this is the same its edu but part of my profession",
"name": "ChatGPT: Rhino 3D object flow"
},
{
"binary": "multi",
"score": 3,
"note": "technical but apersonal project",
"name": "Aaron AI: So, I've been working on the RNAI project, and the way I've ..."
},
{
"binary": "single",
"score": 5,
"note": "this is a technical question",
"name": "ChatGPT: Regex for inserting letters"
},
{
"binary": "multi",
"score": 3,
"note": "this to me is EDU and professional",
"name": "Claude: Evaluating tenure prospects at R1 universities"
},
{
"binary": "multi",
"score": 3,
"note": "this is agina is edu and prfoessional",
"name": "ChatGPT: Title: User request summary."
},
{
"binary": "multi",
"score": 3,
"note": "edu and professional, im applying for a job i forgot i applied for",
"name": "University of North Texas Cover letter.pdf"
},
{
"binary": "multi",
"score": 3,
"note": "personal and professional, im trying to get anotehr job",
"name": "ChatGPT: Resume formatting and review"
},
{
"binary": "single",
"score": 5,
"note": null,
"name": "Nic Oconnor Ind Study S2024 Syllabus.docx"
},
{
"binary": "single",
"score": 5,
"note": "personal too tho",
"name": "Claude: Lubbock on everything album lyrics"
},
{
"binary": "multi",
"score": 3,
"note": "edu and professional",
"name": "Claude: Bonding ASA 3D printed parts"
},
{
"binary": "single",
"score": 5,
"note": "personal to me not edu",
"name": "ChatGPT: Respect Individual Interests for Christmas"
},
{
"binary": "multi",
"score": 3,
"note": "personal too where do i live?",
"name": "Claude: Finding ideal rural housing near University of Utah"
},
{
"binary": "multi",
"score": 3,
"note": "edu and perfessional, where do i work?",
"name": "Claude: Law enforcement career options"
},
{
"binary": "single",
"score": 5,
"note": "this is me asking for technical help",
"name": "ChatGPT: Rectangle Edge Offset Algorithm"
}
],
"completed_names": [
"ChatGPT: Provide DXF file",
"Claude: Internship agreement writing help",
"Claude: Interview presentation research and preparation",
"Circuits II.pptx",
"ChatGPT: Scholarship Recommendation Letter Tips",
"ChatGPT: SEC coaches with OSU ties",
"ChatGPT: Tulsa Concept Album Guide",
"Claude: I filling out my annual report...",
"ChatGPT: Remington 700 5R Gen 1",
"ChatGPT: Research Statement Restructure",
"Claude: SUNY faculty conflict of interest policies",
"Nic Oconnor Field Work F2023 Syllabus.pdf",
"New Mexico Cover Letter.docx",
"ChatGPT: Testing Vector Alignment",
"ChatGPT: Sink Sprayer Fitting Name",
"Polar Coordinates.pptx",
"ChatGPT: Rhino 3D object flow",
"Aaron AI: So, I've been working on the RNAI project, and the way I've ...",
"ChatGPT: Regex for inserting letters",
"Claude: Evaluating tenure prospects at R1 universities",
"ChatGPT: Title: User request summary.",
"University of North Texas Cover letter.pdf",
"ChatGPT: Resume formatting and review",
"Nic Oconnor Ind Study S2024 Syllabus.docx",
"Claude: Lubbock on everything album lyrics",
"Claude: Bonding ASA 3D printed parts",
"ChatGPT: Respect Individual Interests for Christmas",
"Claude: Finding ideal rural housing near University of Utah",
"Claude: Law enforcement career options",
"ChatGPT: Rectangle Edge Offset Algorithm"
]
}
View File
+281
View File
@@ -0,0 +1,281 @@
{
"validation_rate": 48.148148148148145,
"correct": 13,
"total": 27,
"thresholds": {
"high": 0.7,
"low": 0.4
},
"results": [
{
"name": "Claude: Lubbock on everything album lyrics",
"subsample": "a",
"bucket": "high",
"frame_relationships": "The Songwriting frame is the primary focus of this content, with the Storytelling and Literary Analysis frames providing context and analysis for understanding Terry Allen's writing style.",
"coverage_score": 1.0,
"predicted_tier": "baseline",
"actual_best": "baseline",
"match": true
},
{
"name": "ChatGPT: Tulsa Concept Album Guide",
"subsample": "a",
"bucket": "high",
"frame_relationships": "The Personal_Narrative and Place_Exploration frames are intertwined, with the album being a personal account of the writer's experiences in Tulsa. The Autobiography frame is used to disguise the concept album as a traditional autobiography.",
"coverage_score": 1.0,
"predicted_tier": "baseline",
"actual_best": "taxfree",
"match": false
},
{
"name": "ChatGPT: Rhino 3D object flow",
"subsample": "a",
"bucket": "high",
"frame_relationships": "The philosophical concepts provide a framework for understanding and describing digital fabrication processes.",
"coverage_score": 1.0,
"predicted_tier": "baseline",
"actual_best": "baseline",
"match": true
},
{
"name": "Claude: SUNY faculty conflict of interest policies",
"subsample": "a",
"bucket": "mid",
"frame_relationships": "The document primarily discusses the SUNY conflict of interest policy for faculty members, focusing on outside work and consulting. The system-level policy and specific campus handbooks are provided as resources.",
"coverage_score": 1.0,
"predicted_tier": "baseline",
"actual_best": "baseline",
"match": true
},
{
"name": "Claude: Interview presentation research and preparation",
"subsample": "a",
"bucket": "mid",
"frame_relationships": "The Digital-Physical Interface frame centers the workflow between digital models and physical objects, while Emerging Technologies highlights the specific tools (AI, XR) used in this workflow.",
"coverage_score": 1.0,
"predicted_tier": "baseline",
"actual_best": "baseline",
"match": true
},
{
"name": "ChatGPT: Respect Individual Interests for Christmas",
"subsample": "a",
"bucket": "low",
"frame_relationships": "The frames of gift giving and interests/preferences are intertwined as the document discusses choosing gifts based on a recipient's unique hobbies and preferences during the holiday season.",
"coverage_score": 1.0,
"predicted_tier": "baseline",
"actual_best": "baseline",
"match": true
},
{
"name": "University of North Texas Cover letter.pdf",
"subsample": "a",
"bucket": "document",
"frame_relationships": "The 'Education' and 'Job Application' frames are the primary drivers of the document, with 'Artwork Creation' providing contextual details. The 'Academia' frame serves as a broader context for all other frames.",
"coverage_score": 1.0,
"predicted_tier": "baseline",
"actual_best": "taxfree",
"match": false
},
{
"name": "Claude: Finding ideal rural housing near University of Utah",
"subsample": "a",
"bucket": "high",
"frame_relationships": "The geographical location and community characteristics inform the housing and transportation options in each area.",
"coverage_score": 1.0,
"predicted_tier": "baseline",
"actual_best": "taxfree",
"match": false
},
{
"name": "ChatGPT: SEC coaches with OSU ties",
"subsample": "a",
"bucket": "high",
"frame_relationships": "",
"coverage_score": 0.0,
"predicted_tier": "taxfree",
"actual_best": "baseline",
"match": false
},
{
"name": "Claude: Bonding ASA 3D printed parts",
"subsample": "a",
"bucket": "mid",
"frame_relationships": "The frames 'Bonding' and 'Material_Selection' are active, with the latter providing context for the former by discussing suitable adhesives for bonding 3D printed parts.",
"coverage_score": 1.0,
"predicted_tier": "baseline",
"actual_best": "taxfree",
"match": false
},
{
"name": "ChatGPT: Title: User request summary.",
"subsample": "a",
"bucket": "low",
"frame_relationships": "The 'Event' frame is the main context, within which the 'Speech', 'Hosting', and 'Communication' frames are active.",
"coverage_score": 1.0,
"predicted_tier": "baseline",
"actual_best": "taxfree",
"match": false
},
{
"name": "ChatGPT: Scholarship Recommendation Letter Tips",
"subsample": "a",
"bucket": "low",
"frame_relationships": "The frames interact to provide a comprehensive assessment of the candidate's qualifications for a scholarship, focusing on both their academic and personal qualities.",
"coverage_score": 1.0,
"predicted_tier": "baseline",
"actual_best": "baseline",
"match": true
},
{
"name": "ChatGPT: Job application comparison",
"subsample": "b",
"bucket": "high",
"frame_relationships": "The frames interact by comparing the applicant's qualifications to the job requirements, peer deans' profiles, and the mission of the School of the Arts at Western.",
"coverage_score": 1.0,
"predicted_tier": "baseline",
"actual_best": "standard",
"match": false
},
{
"name": "ChatGPT: External review for tenure",
"subsample": "b",
"bucket": "high",
"frame_relationships": "The Tenure Review frame is the central and overarching context in which the other frames (Academic Evaluation, Peer Review) are activated.",
"coverage_score": 1.0,
"predicted_tier": "baseline",
"actual_best": "standard",
"match": false
},
{
"name": "Claude: University of Utah interview teaching example",
"subsample": "b",
"bucket": "high",
"frame_relationships": "The document discusses a university course, its curriculum, and related projects; these topics are interconnected as they all revolve around the teaching of computational design in a university setting.",
"coverage_score": 1.0,
"predicted_tier": "baseline",
"actual_best": "taxfree",
"match": false
},
{
"name": "ChatGPT: Starting Dropship Gun Business",
"subsample": "b",
"bucket": "high",
"frame_relationships": "The 'GunBusinessSetup' frame involves navigating multiple layers of compliance (FFL, zoning, and state/local laws) while building relationships with distributors and setting up an e-commerce platform.",
"coverage_score": 1.0,
"predicted_tier": "baseline",
"actual_best": "taxfree",
"match": false
},
{
"name": "ChatGPT: Analyze business plan",
"subsample": "b",
"bucket": "high",
"frame_relationships": "",
"coverage_score": 0.0,
"predicted_tier": "taxfree",
"actual_best": "taxfree",
"match": true
},
{
"name": "ChatGPT: Outdoor Layering Explained",
"subsample": "b",
"bucket": "mid",
"frame_relationships": "The frames interact by defining layers in an outdoor clothing system for managing sweat, insulation, and protection from weather conditions.",
"coverage_score": 1.0,
"predicted_tier": "baseline",
"actual_best": "taxfree",
"match": false
},
{
"name": "ChatGPT: Limits in Calculus.",
"subsample": "b",
"bucket": "mid",
"frame_relationships": "The 'Mathematics' and 'Calculus' frames are active. The 'Functions', 'Limits', and 'Calculus' frames are interconnected, as the document discusses the concept of limits in relation to functions within calculus.",
"coverage_score": 1.0,
"predicted_tier": "baseline",
"actual_best": "baseline",
"match": true
},
{
"name": "ChatGPT: Academic Program Director Role",
"subsample": "b",
"bucket": "mid",
"frame_relationships": "The 'academic program' frame underpins the entire document, with 'administration', 'faculty management', and 'student engagement' frames supporting it. The 'curriculum development' frame is a core function that intersects all other frames.",
"coverage_score": 1.0,
"predicted_tier": "baseline",
"actual_best": "taxfree",
"match": false
},
{
"name": "ChatGPT: Lonely Island Poop Skit",
"subsample": "b",
"bucket": "mid",
"frame_relationships": "The Music frame is used to deliver comedic content within the Humor frame.",
"coverage_score": 1.0,
"predicted_tier": "baseline",
"actual_best": "standard",
"match": false
},
{
"name": "ChatGPT: Parse Tidal playlist",
"subsample": "b",
"bucket": "mid",
"frame_relationships": "",
"coverage_score": 0.0,
"predicted_tier": "taxfree",
"actual_best": "taxfree",
"match": true
},
{
"name": "NO thesis proposal.pdf",
"subsample": "b",
"bucket": "document",
"frame_relationships": "The artist's career progression is intertwined with their identity and personal interests, shaping the subject matter and style of their product designs.",
"coverage_score": 1.0,
"predicted_tier": "baseline",
"actual_best": "baseline",
"match": true
},
{
"name": "PWM.pdf",
"subsample": "b",
"bucket": "document",
"frame_relationships": "The PWM and Analog to Digital Conversion frames are directly related as they both discuss the principles and applications of PWM in relation to analog-to-digital conversion, while the Arduino ADC frame provides specific context regarding the use of these concepts with an Arduino microcontroller.",
"coverage_score": 1.0,
"predicted_tier": "baseline",
"actual_best": "baseline",
"match": true
},
{
"name": "Will_It_Print.pdf",
"subsample": "b",
"bucket": "document",
"frame_relationships": "The document discusses the design and manufacturing of a part, focusing on its features, material removal, surface finish, support structures, overhangs, internal cavities, tolerances, and stress concentration.",
"coverage_score": 1.0,
"predicted_tier": "baseline",
"actual_best": "taxfree",
"match": false
},
{
"name": "Kim Kedem Ind Study F2025 Syllabus.docx",
"subsample": "b",
"bucket": "document",
"frame_relationships": "The Education frame serves as the context for the Project and Software Development frames, with the 3D Printing frame acting as a specific application of these skills.",
"coverage_score": 1.0,
"predicted_tier": "baseline",
"actual_best": "baseline",
"match": true
},
{
"name": "Aaron Nelson Graduate Transcript.pdf",
"subsample": "b",
"bucket": "document",
"frame_relationships": "",
"coverage_score": 0.0,
"predicted_tier": "taxfree",
"actual_best": "taxfree",
"match": true
}
]
}
+104
View File
@@ -0,0 +1,104 @@
{
"episodes": [
{
"name": "Claude: Modern website redesign with portfolio and contact form",
"content": "[Claude conversation: Modern website redesign with portfolio and contact form]\n\nYou: Q: Want to take some time to browse, or keep moving?\nA: I'll browse those and come back \u2014 let's pause here\n\nClaude: Sounds good, Aaron. Take your time clicking through those \u2014 especially the Architecture & Design community page and the templates. Pay attention to what makes you stop scrolling. When you come back, we can map out your full site architecture, figure out the page structure for both the academic and consulting audiences, and spec out that contact form. Have fun browsing.\n\nYou: oh god so ive notice that a lot of artist websites look like this and I just assumed it was an aesthetic choice but now Im seeing that all the cargo stites look similar [Claude conversation: Modern website redesign with portfolio and contact form]\n\nYou: oh god so ive notice that a lot of artist websites look like this and I just assumed it was an aesthetic choice but now Im seeing that all the cargo stites look similar\n\nClaude: Ha \u2014 yeah, you just pulled back the curtain. That \"clean grid, minimal typography, lots of white space, project thumbnails in a masonry layout\" look that's everywhere in the creative professional world isn't really an aesthetic movement. It's Cargo's design system. Once you see it, you can't unsee it.\n\nThis is actually a real consideration for you, and I want to be honest about it rather than hand-wave it away. On one hand, there's a reason that look is everywhere \u2014 it works, it's legible, it puts the work front and center. On the other hand, you're a designer. If your website looks like every MFA grad's portfolio site, that says something you probably don't want it to say. Especially when you're trying to position yourself as someone who builds things, who leads a fabrication lab, who works at the intersection of computation and manufacturing. That identity is distinct from the gallery-artist-with-a-Cargo-site world.\n\nYou *can* customize Cargo significantly beyond the defau",
"category": "recent_conversation"
},
{
"name": "Claude: University of Utah interview teaching example",
"content": "[Claude conversation: University of Utah interview teaching example]\n\nYou: I guess the next class would be 2 d computational design. It's a programming for the arts course. We teach primarily in processing in Java.\n\nClaude: Got it \u2014 2D Computational Design, programming for the arts, primarily in Processing/Java. What's next?\n\nYou: We teach an advanced 3 d printing course This goes through advanced slicer stuff. We go through usage of the Stratasys equipment, both, FDM and PolyJet. The students also have the option of taking a, Stratasys certification the additive manufacturing certification at the end of the course. [Claude conversation: University of Utah interview teaching example]\n\nYou: Can we have one with just text only and I can let the PP AI designer have a try\n\nClaude: Here's V4 \u2014 completely plain. No colors, no shapes, no accent bars, no styled cards. Just text on white slides with all the speaker notes intact. PowerPoint Designer should have a field day with this. You've got both versions now \u2014 V3 with my styling if you prefer it, V4 plain for Designer to take a crack at. Upload the finished product whenever you're ready and I'll parse it.\n\nYou: Shit I forgot about this, does this count to something?:\n\nhttps://www.aaronmakingart.com/15-twitter-printer/ [Claude conversation: University of Utah interview teaching example]\n\nYou: Shit I forgot about this, does this count to something?:\n\nhttps://www.aaronmakingart.com/15-twitter-printer/\n\nClaude: Yeah, this counts. It counts in a few ways:\n\n**For the presentation prompt specifically** \u2014 this is a project that combines physical computing (Raspberry Pi, thermal printer, 3D printed housing), custom Python code interfacing with an API, and critical engagement with technology's social impact. It's a designed object that makes an invisible digital phenomenon physically present. That's exactly the kind of work that bridges the technical and the critical \u2014 which is where MDD lives.\n\n**For the room you're presenting to*",
"category": "recent_conversation"
},
{
"name": "Claude: Preparing for dinner with Jim Agutter",
"content": "[Claude conversation: Preparing for dinner with Jim Agutter]\n\nYou: I\u2019m meeting with Jim for dinner tonight what can you tell me about him\n\nClaude: Jim Agutter is the founder of the Division of Multi-Disciplinary Design (MDD) at the University of Utah and chairs the search committee for the position you're interviewing for. Here's what's worth knowing heading into dinner:\n\n**Background:** He built MDD from the ground up, so he's likely to have strong opinions about the program's identity and direction. As both founder and search committee chair, his read on you will carry significant weight.\n\n**Research angle:** His work sits at the intersection of information visualization and design \u2014 he's done notable research in healthcare data visualization and complex systems display. There's natural overlap with your computational design work, particularly around data-driven design processes.\n\n**Conversation hooks:** Given that he founded a multi-disciplinary program, he'll probably respond well to hearing how you built HVAMC from scratch \u2014 the parallels are obvious (equipment, policy, client engagement, program growth). That shared experience of building something from nothing is a strong connector.\n\n**What to watch for:** As the founder, he may be protective of MDD's culture and trajectory. Listen carefully for what he signals about the program's future direction and where he sees gaps. His questions will likely reveal what he thinks the program needs most.\n\nWant me to search for any recent publications or projects of his, or anything else that might be useful before tonight?\n\nYou: Yes do a thourough search [Claude conversation: Preparing for dinner with Jim Agutter]\n\nYou: Yes do a thourough search\n\nClaude: Let me dig into Jim Agutter's recent work and background thoroughly.Here's the full picture on Jim Agutter going into tonight:\n\n**Current roles (he wears a lot of hats):**\nHis current title is Senior Associate Dean of Faculty Success & Academic Innovation in the Office of",
"category": "recent_conversation"
},
{
"name": "Claude: Realtor requirements for home buying in New York",
"content": "[Claude conversation: Realtor requirements for home buying in New York]\n\nYou: do I need to have a realtor in ny state to buy a home?\n\nClaude: No, Aaron \u2014 it's perfectly legal in all 50 states to buy a house without using a real estate agent on your end, and New York is no exception. With the necessary due diligence, you can buy a house without a Realtor in New York.\n\nThat said, there's one important distinction for New York specifically: in New York, you must use a real estate attorney to assist with the paperwork and legal aspects of the transaction. This is different from many other states \u2014 NY is one of the states that requires attorney involvement in real estate closings. So while a realtor is optional, a real estate attorney is not.\n\nA few things worth keeping in mind as you're thinking about this up in the Hudson Valley area:\n\n**What a buyer's agent does for you** \u2014 they handle property searches, negotiate on your behalf, and guide you through co-op/condo board processes if applicable. Most home sellers have an agent working on their side of the deal, so it's smart to have someone on your side as a buyer too.\n\n**The cost question** \u2014 buying without an agent is rarely cheaper, as the commission your realtor would collect will likely just be collected by the seller's agent instead. That said, since the 2024 NAR settlement changes, commission structures are more negotiable and buyers may need to negotiate their agent's fee directly.\n\n**If you skip the agent** \u2014 you'll want to lean more heavily on your real estate attorney and be prepared to do your own comparable market analysis, negotiate directly with the seller's agent, and manage the inspection/appraisal timeline yourself.\n\nGiven that you're already in the area and presumably know the local market reasonably well, going without could be viable \u2014 but a good attorney is non-negotiable either way. Want me to dig into anything more specific, like first-time buyer programs in NY or what to look for in a real estat",
"category": "recent_conversation"
},
{
"name": "Claude: Memory",
"content": "[Claude memory \u2014 what Claude has learned about Aaron]\n\n- {'conversations_memory': '**Work context**\\n\\nAaron Nelson is an Associate Professor of Digital Design & Fabrication and Program Director and Lead Researcher for the Hudson Valley Additive Manufacturing Center (HVAMC) at SUNY New Paltz, a facility he built from the ground up since 2016. He oversees the DDF minor (~50 students), a hybrid graduate program stacked with undergraduates, and industry partnerships with companies including IBM, Braskem, Selux, Sono-Tek, JBT, and Usheco. Aaron is actively weighing whether to remain at SUNY New Paltz \u2014 which faces structural budget deficits and program vulnerability risks \u2014 or pursue a major career transition, with a University of Utah faculty opportunity and an independent consultancy as the leading alternatives.\\n\\n**Personal context**\\n\\nAaron grew up in Tulsa from 1989 to 2001 and has a strong emotional pull back to the city and the broader Oklahoma/Rocky Mountain region. He has a merle border collie named Bird. He is working on a concept album about Tulsa \u2014 modeled on Terry Allen\\'s *Lubbock (on Everything)* \u2014 framed as an autobiography disguised as a breakup album with the city, drawing on his experience as a mixed-race person who felt rejected by Tulsa. He also maintains a range of design and illustration side projects (graffiti lettering, vector art, historical restoration work) and has interest in advanced geometric calculations relevant to fabrication contexts.\\n\\n**Top of mind**\\n\\nAaron has an on-campus interview at the University of Utah\\'s Division of Multi-Disciplinary Design (MDD) scheduled for the third week of April 2026, and interview prep is his primary active focus \u2014 including a concrete teaching example built around his Advanced 3D Printing chocolate casting project (PolyJet printing \u2192 silicone mold-making \u2192 food-safe casting \u2192 packaging and branding), which best demonstrates innovation with mixed-skill, cross-disciplinary students. He is simultane",
"category": "recent_conversation"
},
{
"name": "ChatGPT: Website architecture options",
"content": "[Conversation: Website architecture options] Aaron: i want to make a modern webiste for a product with images, video, and e comerce integration. It should have aback end to be easily updateable by non technical staff once it is up in operation. What are the architectures or frameworks I should be looking at. ChatGPT: For what you\u2019re describing, you\u2019re basically choosing between a few big **architectural patterns**, and then some specific **framework + CMS + e-commerce** combos that fit your budget and tolerance for complexity. I\u2019ll lay this out like a menu so you can mix and match. --- ## 1. Start with the big picture: what \u201cshape\u201d of site? You want: - Modern front end (images, video, nice UX) - E-commerce - Non-technical staff can update content/products - Maintainable over time That leads to three common approaches: ### A. All-in-one hosted platform (least dev overhead) **Examples:** - **Shopify** (with a modern theme or custom front end) - **Squarespace** Commerce - **Wix** with e-commerce **Why pick this:** - Fastest to launch. - Non-technical staff can edit almost everything through the UI. - Payments, security, hosting managed for you. - Plenty of modern templates with video, sections, etc. **Trade-offs:** - Less control over custom interactions or weird layouts. - Developer work is mainly theme customization / apps, not full freedom. - Can get \u201cplugin soup\u201d if you bolt on too many apps. This is ideal if you want to focus on product and branding, not dev ops. --- ### B. Headless \u201cJamstack\u201d style (most flexible, very modern) **Concept:** - Front end built with a modern JS framework. - Back end is mostly **APIs**: headless CMS + headless e-commerce + media hosting. **Typical stack:** - **Front end framework**: - **Next.js** (React-based, great defaults, SEO friendly, hybrid static/server rendering) - Or **Nuxt** (Vue-based), **SvelteKit** (Svelte), **Remix** (React) - **CMS for non-technical editors**: - **Sanity**, **Contentful**, **Strapi**, **Prismic**, **Sto",
"category": "mid_conversation"
},
{
"name": "ChatGPT: Meat and longevity tips",
"content": "[Conversation: Meat and longevity tips] Aaron: What kind of meat do you eat for longevity ChatGPT: That\u2019s a great question \u2014 the short answer is: there\u2019s no one \u201cmagic meat\u201d that guarantees longevity. But research does suggest that *which* meats you eat, *how much*, *how often*, and *how* they\u2019re prepared all matter a lot for long-term health. Below is a summary of what the science currently suggests, plus practical tips. --- ## What the evidence says Here\u2019s a synthesis of what observational and epidemiological studies show (with the caveat that diet studies are inherently tricky to interpret). ### Meat types & longevity risks - **Red meat (beef, pork, lamb, etc.)** Many large long-term studies have found higher intake of red meat (especially *unprocessed* red meat) to be associated with modestly increased risk of total mortality, cardiovascular disease, and certain cancers. \ue200cite\ue202turn0search3\ue202turn0search9\ue202turn0search2\ue201 Processed red meats (bacon, sausage, deli meats, etc.) have even stronger associations with health risks. \ue200cite\ue202turn0search9\ue202turn0search10\ue202turn0search18\ue201 - **White meat / poultry / lean cuts** Some studies show neutral or even slightly beneficial associations for \u201cwhite\u201d meats (e.g. chicken, turkey) when compared to red meat. \ue200cite\ue202turn0search3\ue201 - **Fish / seafood** Fish is often considered one of the more favorable animal proteins for longevity, especially when it replaces red and processed meats. In dietary patterns tied to longevity, fish often plays a key role. \ue200cite\ue202turn0search17\ue202turn0search11\ue202turn0search30\ue201 - **Very low meat / plant-based diets** Some cohorts suggest that diets with *very low* meat intake (or entirely plant-based) are associated with lower mortality risk over decades. \ue200cite\ue202turn0search5\ue202turn0search7\ue202turn0search11\ue201 However, other research cautions that the data are mostly observational and subject to confounding factors (e.g. overall lifestyle, socioeconomic status, access to health care) so we can\u2019t conclude strong causality. \ue200",
"category": "mid_conversation"
},
{
"name": "ChatGPT: Funk and jazz list",
"content": "[Conversation: Funk and jazz list] Aaron: 100 funk and jazz you can include newer artists like meehand street band and vulfpeck i need a list to import into tunemy music ChatGPT: Awesome \u2014 you\u2019re looking for **100 tracks** blending **funk and jazz**, including **newer artists like Meenhan Street Band and Vulfpeck**. That\u2019s a killer combo! I\u2019ll break this into four batches of **25 songs** each so you can import them smoothly into TuneMyMusic. Here\u2019s the first batch of **25 funk and jazz-infused tracks**, mixing **classic and modern artists** to keep things fresh: --- ### \ud83c\udfb7 Funk & Jazz \u2013 Batch 1 of 4 1. Vulfpeck \u2013 **Dean Town** 2. The Mehan Street Band \u2013 **Midnight Morning** 3. Cory Wong \u2013 **Cosmic Sans** 4. Lettuce \u2013 **Phyllis** 5. Ghost-Note \u2013 **Swagism** 6. Snarky Puppy \u2013 **What About Me?** 7. Tower of Power \u2013 **Soul With a Capital \u2018S\u2019** 8. Average White Band \u2013 **Pick Up the Pieces** 9. Herbie Hancock \u2013 **Chameleon** 10. Maceo Parker \u2013 **Shake Everything You\u2019ve Got** 11. The New Mastersounds \u2013 **Your Love is Mine** 12. Stuff \u2013 **Foots** 13. Kamasi Washington \u2013 **Street Fighter Mas** 14. The Greyboy Allstars \u2013 **Still Waiting** 15. Prince \u2013 **Musicology** 16. Cory Henry \u2013 **Trade It All** 17. Parliament \u2013 **Give Up the Funk (Tear the Roof Off the Sucker)** 18. The JB\u2019s \u2013 **Pass the Peas** 19. Scary Pockets \u2013 **Ain\u2019t No Sunshine (feat. Kenton Chen)** 20. The Comet Is Coming \u2013 **Summon the Fire** 21. Roy Hargrove \u2013 **Strasbourg / St. Denis** 22. Soulive \u2013 **El Ron** 23. The Budos Band \u2013 **Up From the South** 24. Khruangbin \u2013 **Maria Tambi\u00e9n** 25. Tom Misch & Yussef Dayes \u2013 **What Kinda Music** --- Would you like the **next 25 tracks**? I\u2019ll continue with a similar energy and variety of classic & contemporary funk-jazz gems. Aaron: I want all 100 in a list that I can import into tune my music ChatGPT: Got it \u2014 here's a **single list of 100 funk and jazz tracks**, curated with a blend of **classic legends** and **new-wave artists** like Vulfpeck, Meenhan Street Band, Co",
"category": "mid_conversation"
},
{
"name": "ChatGPT: Variants for different SKUs",
"content": "[Conversation: Variants for different SKUs] Aaron: i am running into an issue that i don;t know how to solve, where it seems like i need to have two products 1 each for the 6 inch and 8 inch segments so they get different skus ChatGPT: Yeah, you\u2019ve basically hit the point where \u201ctwo SKUs, one visual product\u201d becomes a thing \ud83d\ude42 You **don\u2019t have to** create two totally separate products in Sanity/Next for the 6\" and 8\" segments. Instead, you can treat them as **variants of a single product**, each with its own SKU (and price, stock, etc.), while keeping them on **one PDP**. Here\u2019s how I\u2019d handle it in your current setup. --- ## Conceptual answer - Each **sellable thing with a different SKU** should be its own **variant**. - Variants live under one **parent product** (your \u201cSegment\u201d product). - On the site, the user chooses **6\" vs 8\"** from a dropdown, and you use the variant\u2019s SKU when: - sending to your cart / checkout - syncing with inventory - showing price, etc. So you have **one product document** in Sanity like \u201cMossy Segment\u201d and inside it: - Variant A: 6\" / SKU SEG-6IN - Variant B: 8\" / SKU SEG-8IN --- ## 1. Sanity schema: add variants In your `product` schema, you can add a `variants` array and a `productVariant` object: ```ts // schemas/productVariant.ts export default { name: 'productVariant', title: 'Variant', type: 'object', fields: [ { name: 'label', title: 'Label (e.g. 6 inch)', type: 'string', validation: (Rule) => Rule.required(), }, { name: 'lengthInches', title: 'Length (inches)', type: 'number', }, { name: 'sku', title: 'SKU', type: 'string', validation: (Rule) => Rule.required(), }, { name: 'price', title: 'Price', type: 'number', }, { name: 'etsyVariantId', title: 'Etsy Variant ID (optional)', type: 'string', }, ], }; ``` Then in your `product` schema: ```ts // schemas/product.ts export default { name: 'product', title: 'Product', type: 'document', fields: [ // ...existing fields: title, slug, image, kind, etc. { name: 'variants', title: 'Variant",
"category": "mid_conversation"
},
{
"name": "ChatGPT: Hip hop weed songs",
"content": "[Conversation: Hip hop weed songs] Aaron: hip hop songs about smoking weed ChatGPT: Nice! You're looking for hip-hop songs about smoking weed \u2014 a classic vibe with tons of iconic tracks. Here\u2019s a **25-song playlist** that blends classic stoner anthems, laid-back beats, and lyrical homages to cannabis culture across different eras of hip-hop: --- ### \ud83c\udf3f **Hip-Hop Weed Anthems \u2013 25 Tracks** \ud83c\udf3f 1. **Dr. Dre \u2013 \"The Next Episode\" (feat. Snoop Dogg)** 2. **Cypress Hill \u2013 \"Hits from the Bong\"** 3. **Wiz Khalifa \u2013 \"Still Blazin\"** 4. **Redman \u2013 \"How to Roll a Blunt\"** 5. **Curren$y \u2013 \"Mary\"** 6. **Devin the Dude \u2013 \"Doobie Ashtray\"** 7. **Bone Thugs-N-Harmony \u2013 \"Buddah Lovaz\"** 8. **Afroman \u2013 \"Because I Got High\"** 9. **Snoop Dogg \u2013 \"Smoke Weed Everyday\" (Outro of \"The Next Episode\")** 10. **Berner \u2013 \"20 Joints\"** 11. **Kid Cudi \u2013 \"Marijuana\"** 12. **Luniz \u2013 \"I Got 5 On It\"** 13. **Method Man & Redman \u2013 \"How High\"** 14. **Ty Dolla $ign \u2013 \"Love U Better\" (feat. Lil Wayne & The-Dream) [weed vibes]** 15. **ScHoolboy Q \u2013 \"Druggys Wit Hoes Again\" (feat. Ab-Soul)** 16. **Juicy J \u2013 \"Smokin' Rollin'\" (feat. Pimp C)** 17. **Flatbush Zombies \u2013 \"Palm Trees\"** 18. **Mac Miller \u2013 \"The Star Room\"** 19. **Freddie Gibbs & Madlib \u2013 \"High\" (feat. Danny Brown)** 20. **Rick Ross \u2013 \"Super High\" (feat. Ne-Yo)** 21. **Outkast \u2013 \"Crumblin' Erb\"** 22. **Joey Bada$$ \u2013 \"Paper Trail$\" [vibe fit, not directly weed-themed]** 23. **Action Bronson \u2013 \"The Rockers\" (feat. Wiz Khalifa)** 24. **Smoke DZA \u2013 \"Personal Party\" (feat. Curren$y)** 25. **Do or Die \u2013 \"Po Pimp\" (feat. Twista & Johnny P)** --- ### Want more? If you\u2019re vibing with this list, I can roll out another batch of 25 tracks \u2014 whether you want more chill beats, trap-heavy weed anthems, or a deeper dive into underground stoner rap. Just say the word \ud83c\udf2c\ufe0f\ud83d\udca8 Would you like a Part 2? Aaron: part 2 ChatGPT: Love it \u2014 let\u2019s keep the rotation going! \ud83c\udf3f\ud83d\udd25 Here's **Part 2** of your hip-hop weed anthem playlist \u2014 another **25 tracks** for those laid-back, cloudy ",
"category": "mid_conversation"
},
{
"name": "ChatGPT: Anvil Jeep with Wheels",
"content": "[Conversation: Anvil Jeep with Wheels] Aaron: a anvil coloered jeep jl with white wagon style wheels ChatGPT: Picture a Jeep JL with an anvil gray paint job, giving it a sleek yet rugged look. The matte finish exudes strength and durability, perfectly matching the vehicle's off-road capabilities. The Jeep is outfitted with distinctive white wagon-style wheels that pop against the darker body, offering a nostalgic yet modern contrast. The classic design of the wheels gives a vintage touch, while the Jeep's boxy, muscular stance remains thoroughly contemporary. The vehicle stands ready for adventure, blending both style and function in a unique way. Would you like me to generate an image based on this description? Aaron: yes ChatGPT: { \"size\": \"1024x1024\", \"prompt\": \"A Jeep JL with an anvil gray paint job, featuring a matte finish for a rugged look. The Jeep has distinctive white wagon-style wheels that offer a nostalgic contrast against the darker body. The vehicle has a boxy and muscular stance, evoking a sense of strength and adventure, blending modern design with a touch of vintage style.\" } ChatGPT: Here are the images of the Jeep JL with an anvil gray finish and white wagon-style wheels, blending a modern rugged look with a vintage touch. Let me know if you'd like any adjustments! Aaron: the actual sheels shoudl be white ChatGPT: { \"size\": \"1024x1024\", \"prompt\": \"A Jeep JL with an anvil gray paint job, featuring a matte finish for a rugged look. The Jeep has pure white wagon-style wheels that stand out sharply against the darker body, giving a strong contrast. The vehicle maintains its boxy and muscular stance, exuding both modern design and vintage styling with these distinctive white wheels. The scene emphasizes the combination of strength, adventure, and nostalgic style.\" } ChatGPT: Here are the updated images of the Jeep JL, now featuring the pure white wagon-style wheels that stand out against the anvil gray body. Let me know if there's anything else you'd ",
"category": "early_conversation"
},
{
"name": "ChatGPT: Octopus Beer Line Art",
"content": "[Conversation: Octopus Beer Line Art] Aaron: black and white vector line art of an octopus drinking a beer ChatGPT: { \"prompt\": \"A detailed black and white vector line art of an octopus drinking a beer. The octopus has intricate, swirling tentacles, some of which are holding a pint glass filled with beer. The scene is playful, with bubbles rising from the beer and a relaxed expression on the octopus' face. The octopus should have textured skin with fine linework to show detail, and the beer glass should have a frothy head. The image should have a clean, minimalistic design, with precise lines and a modern feel, suitable for a tattoo or illustration.\", \"size\": \"1024x1024\" } ChatGPT: Here are two different black and white vector line art designs of an octopus drinking a beer. If you'd like any adjustments or have more specific preferences, feel free to let me know! Aaron: Too busy ChatGPT: { \"prompt\": \"A simplified black and white vector line art of an octopus drinking a beer. The design should focus on minimalistic, clean lines with fewer details, making the composition less busy. The octopus can have smooth, flowing tentacles, and only one tentacle should hold a pint glass of beer. The beer glass should have a subtle frothy head, and the octopus should have a relaxed expression. Keep the linework thin and uncluttered, with a modern and simple style, suitable for a minimalist illustration or logo.\", \"size\": \"1024x1024\" } ChatGPT: Here are two simplified, minimalist designs of an octopus drinking a beer. Let me know if you'd like further adjustments! Aaron: too simple ChatGPT: { \"prompt\": \"A balanced black and white vector line art of an octopus drinking a beer. The design should have more detail than minimalistic styles but still maintain a clean, organized look. The octopus should have a few gracefully swirling tentacles, with one tentacle holding a pint glass with a frothy head. The octopus should have some light texturing on its skin, with thin, consistent lines, ",
"category": "early_conversation"
},
{
"name": "ChatGPT: Monty Python's Moving On",
"content": "[Conversation: Monty Python's Moving On] Aaron: did monty python ever do sketches about moving on ChatGPT: Yes, Monty Python, the British comedy group, created sketches that explored the theme of moving on or moving forward in various contexts. Here are a few examples: 1. The Dead Parrot Sketch: In this famous sketch, a customer played by John Cleese tries to return a dead parrot to a pet shop. Despite the obvious fact that the parrot is deceased, the shopkeeper played by Michael Palin insists that it's simply resting or pining for the fjords. The sketch humorously highlights the frustrations of trying to move on from a faulty purchase or resolve a problem with stubbornness. 2. The Ministry of Silly Walks: This sketch features John Cleese as a civil servant who works in the Ministry of Silly Walks. The premise revolves around absurdly exaggerated and impractical walking styles. Although not explicitly about moving on, it satirizes bureaucracy and the rigid adherence to outdated traditions, hinting at the need to move forward and embrace change. 3. The Lumberjack Song: In this musical sketch, Michael Palin plays a rugged lumberjack who confesses that he wants to be a woman and wear women's clothing. The song humorously depicts his desire for change and moving on from his current profession to pursue a more unconventional path. 4. The Argument Clinic: This sketch portrays a customer, played by Michael Palin, seeking an argument at an argument clinic. John Cleese, who portrays the employee, engages in a series of illogical and infuriating exchanges, making it difficult for the customer to move forward or make progress in their argument. The sketch pokes fun at the futile nature of certain discussions and the challenges of resolving conflicts. These sketches and others created by Monty Python often incorporated themes of absurdity, social commentary, and the struggles of navigating everyday situations, including the desire to move on or deal with obstacles in comedic wa",
"category": "early_conversation"
},
{
"name": "ChatGPT: Generate Female Brunette Image",
"content": "[Conversation: Generate Female Brunette Image] Aaron: I am going to upload an image, please generate an image of a mate, preference for female brunette ChatGPT: {\"prompt\":\"Generate a photorealistic portrait of a white brunette woman in her 30s, matching the level of attractiveness and style of the provided image. She should have a neutral to slight smile expression and a simple indoor background similar to the original setting.\",\"size\":\"512x512\",\"n\":1} ChatGPT: Here\u2019s an AI-generated portrait of a female brunette, roughly the same age and attractiveness level, set in a similar design-focused environment. I matched the lighting and background aesthetic to complement the original image you provided. Aaron: I am going to upload an image, please generate an image of a mate, preference for male brunette ChatGPT: {\"prompt\":\"A close-up portrait of a Caucasian man in his late 20s to early 30s with dark brown hair, neatly styled. He wears a casual button-up shirt and smiles gently at the camera. The setting is a design studio with a 3D printer and architectural models in the background. Soft, diffused lighting highlights his features, and the focus is sharp on his face, matching the style and warmth of the provided reference image.\"} ChatGPT: Here\u2019s an AI-generated portrait featuring a male brunette of a similar age and style, seated in a design studio environment to match the aesthetic of your original image. I\u2019ve aimed to reflect comparable lighting, attire, and background elements so that the two images complement each other. Aaron: make him bald and asian ChatGPT: {\"prompt\":\"Transform the referenced portrait by making the man bald with Asian facial features, while retaining the same professional setting, clothing, and lighting.\",\"size\":\"1024x1024\",\"n\":1,\"referenced_image_ids\":[\"15b170bc-0716-49a9-b3dc-619c3f69394f\"]} ChatGPT: Here\u2019s an edited version of the portrait: a bald, Asian male in the same design studio setting. I framed him with comparable lighting, attire, and ",
"category": "early_conversation"
},
{
"name": "ChatGPT: New chat",
"content": "[Conversation: New chat] Aaron: python script for grasshoper that explode text ChatGPT: Certainly! Here's an example of a Python script for Grasshopper that explodes text: ```python import rhinoscriptsyntax as rs def explode_text(text): curves = [] objects = rs.ExplodeText(text) if objects: for obj in objects: if rs.IsCurve(obj): curves.append(obj) else: rs.DeleteObject(obj) return curves # Define the text to explode text = \"Hello Grasshopper!\" # Call the function to explode the text result = explode_text(text) # Print the result if result: for curve in result: print(\"Exploded curve:\", curve) else: print(\"No curves were created.\") ``` Make sure you have the Rhinoscript library (`rhinoscriptsyntax`) installed in your Grasshopper Python environment. In this script, the `explode_text` function takes a text string as input and uses the `rs.ExplodeText` function from the Aaron: i want to wrrite a compoent in python for grasshoper that explodes text ChatGPT: Certainly! Here's an example of a Python component for Grasshopper that explodes text: ```python import rhinoscriptsyntax as rs import Grasshopper.Kernel.Parameters as gkparams import Grasshopper.Kernel.Types as gtktypes class ExplodeTextComponent(GhPython.Assemblies.PythonComponent): def __new__(cls): instance = super(ExplodeTextComponent, cls).__new__(cls) instance.curve_output = gkparams.Param_Curve() return instance def get_ComponentGuid(self): return System.Guid(\"GUID-HERE\") def get_ComponentName(self): return \"Explode Text\" def get_ComponentIcon(self): return None def register_InputParams(self, pManager): pManager.AddTextParameter(\"Text\", \"T\", \"Text to explode\", gkparams.Param_Required) def register_OutputParams(self, pManager): pManager.AddParameter(self.curve_output, \"Curves\", \"C\", \"Exploded curves\", gkparams.Param_Output) def solve_Instance(self, DA): text = DA.GetData(0) curves = self.explode_text(text) DA.SetDataList(0, curves) def explode_text(self, text): curves = [] objects = rs.ExplodeText(text) if obje",
"category": "early_conversation"
},
{
"name": "Bosch Ind Study SP2022 Syllabus.docx",
"content": "Independent Study Industrial Design Client Branding ARS - 495 3 Credit Hours Spring 2022 \u2013 M \u2013 R 3:15pm \u2013 5:00pm Aaron Nelson EIH 103 \u2013 HVAMC Office Hours: M-R 10am \u2013 12pm nelsona@newpaltz.edu This Independent Study will be focused on 3D design branding for visual identity. The student will combine all the skills gained through the DDF program to build a project from the inspiration phase to the completed branding product. Tools and skills used during the project will include use of additive and subtractive fabrication equipment, and assessment of materials. Student Learning Outcomes: Student will synthesize and apply skills that have been built from DDF course to create a completed project with finished parts. Students who participate in the project will: Demonstrate their understanding of the basic concepts, vocabulary, and semantics of design. Identify and analyze for correct materials choice and usage Utilize manufacturing knowledge to generate cost/pricing evaluations for market determination Develop and understanding of client\u2019s visual language and synthesize into compelling 3D design Texts Simplified Complexity by Giancarlo De Marco Algorithms Aided Design by Arturo Tedeschi Grading & Attendance Students will be graded based on the rubric below: Homework will be due at the end of the next class period, unless otherwise specified. Due dates will be given in class. To be considered complete homework must be submitted as follows: Completed files uploaded to Blackboard BEFORE the start of class in which it is due. Files must be your own work. Uploading files sourced whole from the internet or created by anyone other than you will be reported as an academic integrity violation. Files should be formatted as instructed, deviations from naming and format will be counted as points off. Files should include comments and attributions of borrowed code for full points. Late assignments result in 10% deducted per day the assignment is late (ex. if you submitted one day lat",
"category": "document"
},
{
"name": "Mod02_Industries_and_Applications.pdf",
"content": "INDUSTRIES & APPLICATIONS USING STRATASYS 3D PRINTING Module 2 2 STRATASYS / A GLOBAL LEADER IN APPLIED ADDITIVE TECHNOLOGY SOLUTIONS Learning Objectives: \u2022 Identify 6 key industries leading in additive manufacturing using Stratasys technology. \u2022 Define the benefits and use cases for rapid prototyping, tooling and production parts. \u2022 Identify key FDM printing applications, their value and companies benefiting in the Aerospace Industry, Automotive Industry, Consumer Goods business, Healthcare: Medical and Dental \u2022 Describe the types of tools, printing solution and needs that 3D printing aides and tools have to general manufacturing (fabrication and assembly, health and safety, quality control, workflow) across industries. \u2022 Define Digital Manufacturing applications of Sandcasting, jigs and fixtures, EOAT, blow molding, End-use-parts, Silicone molding, Injection Molding and liquid silicone rubber parts. MODULE 2: INDUSTRIES & APPLICATIONS USING STRATASYS 3 STRATASYS / A GLOBAL LEADER IN APPLIED ADDITIVE TECHNOLOGY SOLUTIONS Additional Resources: \u2022 Embedded Videos \u2022 ACTIVITY Tooling Mini Challenge MODULE 3: INDUSTRIES & APPLICATIONS USING STRATASYS 4 STRATASYS / A GLOBAL LEADER IN APPLIED ADDITIVE TECHNOLOGY SOLUTIONS WHAT MATTERS MORE? OR 5 STRATASYS / A GLOBAL LEADER IN APPLIED ADDITIVE TECHNOLOGY SOLUTIONS https://www.youtube.com/watch?v=K11MWto9tKk INDUSTRIES & APPLICATIONS SHAPING OUR WORLD 6 STRATASYS / A GLOBAL LEADER IN APPLIED ADDITIVE TECHNOLOGY SOLUTIONS INDUSTRIES ManufacturingAerospace Automotive Consumer Products Healthcare 7 AEROSPACE 8 STRATASYS / A GLOBAL LEADER IN APPLIED ADDITIVE TECHNOLOGY SOLUTIONS SHAPING MANUFACTURING & SUPPLY CHAIN IN AEROSPACE APPLICATION VALUE WHO \u2022 Wind tunnel testing \u2022 Carbon fiber layups \u2022 Surrogate parts \u2022 Tooling for composite aero-structures \u2022 Production parts including interior cabin parts \u2022 Lighter weight parts \u2022 Advanced functionality \u2022 Improved fly-to-buy ratio \u2022 Increased supply chain efficiency with reduced invento",
"category": "document"
},
{
"name": "CAA Workshop.pdf",
"content": "2019 CAA Conference Workshop Parametric Modelling with Rhino and Grasshopper Twisted Vase Demo Aaron Nelson Assistant Professor \u2013 Digital Design and Fabrication Hudson Valley Advanced Manufacturing Center State University of New York at New Paltz Rhino \u2022 Rhino is a surface modeler based on NURBS. \u2022 NURBS is a spline implementation that is the mathematical basis of the NURBS curve (Control Point Curve) \u2022 Curves can be used to describe the edges of Surfaces \u2022 Surfaces are infinitely thin 2D boundary areas \u2022 Multiple surfaces joined at their edges that define a closed 3D region is a Solid Grasshopper and Parametric Models \u2022 Grasshopper is a visual programming language allowing the development of parametric models driven by algorithm within the Rhino environment \u2022 We can perform operations on Rhino geometries referencing them into Grasshopper and passing them into components \u2022 Parametric Models are designs that are driven by input parameters. Adjusting these input parameters changes the geometry of the model as generated by algorithm. This gives us the ability to quickly iterate through form and geometry (form finding) Grasshopper Components Input Components Pass information to other components Container Components Contains Data (usually geometry) Standard Components Performs an operation on some input data Grasshopper Components and Data Flow data flow \u2013 left to right (no looping structures) number slider - input curve container red components are in error state yellow components are in alert state a single line indicates one piece of data passed double lines indicates multiple pieces of data passed dashed lines indicates multiple pieces of data passed as a data tree data from one component can be passed into multiple components The Topology of the Vase The vase is assembled from 5 surfaces \u2022 Surfaces are infinitely thin 2D boundaries \u2022 Surfaces do not exist in 3D space \u2022 To 3D Print the vase we need to define a closed 3D region (Closed Solid) with the surfaces The Top",
"category": "document"
},
{
"name": "GRAD_ARF_24-25.pdf",
"content": "Graduate, Professional & Interdisciplinary Studies Graduate or Teaching Assistantship Appointment Request Form (ARF) Applicant Information Banner ID: N Sal. Name Major Code____________ GPA _______ Last First MI Address Street City State Zip Telephone New Paltz Email Home or Mobile Position Hiring Dept Residency ___ In-State ___ Out-of-State ___ Foreign (International students must fill out the information below regarding work eligibility) Country of Citizenship: _______________________ VISA Type:_______ _ Do you have the legal right to accept employment in the United States? ___ Yes __ No Proof of identity AND either US Citizenship or employment authorization are required prior to employment. Hiring Timeline Be attentive to your New Paltz email account! Communication regarding your hiring process will be conducted via NP email. More detailed information about the hiring process can be found on the Grad Studies website here. Attestation I hereby authorize investigation of all statement in this application and attached data as provided. I certify that such docu mentation is true and understand that misrepresentation or omission of facts called for in this form may be cause for refusal of employment or termination if I am offered a position. I understand that all graduate employees must satisfy the application and reappointment criteria listed in the Teaching Assistant and Graduate Assistant section of the Graduate, Professional & Interdisciplinary Studies website. If offered a position, I will submit e vidence of matriculation, cumulative GPA of at least 3.00 without grades of F or Incomplete, and registration for at least 6 graduate credits. Additionally, I understand that I must complete all hiring paperwork within 3 days of employment. Signature ________________________________________________________ Date _______________________________________________________ SUNY New Paltz does not discriminate on the basis of race, color, religion, sex, age, national origin, di",
"category": "document"
},
{
"name": "Thesis Request.pdf",
"content": "8/11 \u2022 38-007 S TAT E U N I V E R S I T Y O F N E W YO R K GRADUATE THESIS REQUEST Office of Records & Registration, SUNY New Paltz, 500 Hawk Drive, New Paltz, NY 12561-2439 F all Spring Summer I Summer II 20_____ Please type or print: __________________________________________________________________ Last Name First MI Student ID Number __________________________________________________________________ _____________________________________ Local Address: Street Apt. No. E-mail __________________________________________________________________ (________) ___________________________ City State Zip Code T elephone Number Course Number ______________________ Abbreviated title of study ____________________________ Credits* _________ * Credits vary from one program to another and students should check the College catalog for credit permitted. Please PRINT Instructor\u2019s name _____________________________________ Instructor ID Number RECOMMENDED BY: _____________________________________________________ ________________________________________________ Signature of Student Date Signature of Instructor Date _____________________________________________________ Signature of Department Chair Date AFFIRMATION OF CHARGES: I, ____________________________________________________________ , have read SUNY New Paltz\u2019s graduate student Continued Registration policy (http://www.newpaltz.edu/graduate/cont_reg_affirmation_of_charges_form_final_11.9.10.pdf). I understand that: \u2022 if I register for Comprehensive Exam Preparation (XXX599) and fail to complete the exam at the end of the semester, or \u2022 if I receive an H grade in a Thesis course, but I\u2019ve completed my other course work required for my degree, I will be automati - cally registered for one credit of Continued Registration each fall and spring semester until I either submit my thesis for a grade or pass the comprehensive exam for my graduate program. Furthermore, I understand that I am responsible for payment of Continued Registrati",
"category": "document"
}
]
}
+177
View File
@@ -0,0 +1,177 @@
{
"sample_size": 50,
"batch_size": 5,
"n_batches": 10,
"successful_batches": 4,
"failed_batches": 6,
"successful_episodes": 20,
"failed_episodes": 30,
"total_elapsed_s": 283.0,
"mean_elapsed_per_episode_s": 5.29,
"results": [
{
"batch_size": 5,
"status_code": 200,
"elapsed_s": 17.22,
"elapsed_per_episode_s": 3.44,
"response": {
"ok": true,
"count": 5
},
"error": null,
"sources": [
"ChatGPT: Create new playlist",
"04_Annotations.docx",
"Macro1.txt",
"Interns Fall 22.txt",
"Grasshopper Homework 1.pptx"
]
},
{
"batch_size": 5,
"status_code": 200,
"elapsed_s": 15.94,
"elapsed_per_episode_s": 3.19,
"response": {
"ok": true,
"count": 5
},
"error": null,
"sources": [
"Course Flow Chart.docx",
"CertificationActivity_1.1.pdf",
"Additional SLO for Graduate Independent Study.docx",
"Sarah.pdf",
"06_3D_Editing.docx"
]
},
{
"batch_size": 5,
"status_code": 200,
"elapsed_s": 47.24,
"elapsed_per_episode_s": 9.45,
"response": {
"ok": true,
"count": 5
},
"error": null,
"sources": [
"05_Making things solid.docx",
"ANelsonUPS.pdf",
"Grad Program Possibilities.docx",
"Gregg Navarro Schematic -2019.pdf",
"Feedback From Image Cutouts.txt"
]
},
{
"batch_size": 5,
"status_code": 500,
"elapsed_s": 16.68,
"elapsed_per_episode_s": 3.34,
"response": null,
"error": "{\"detail\":\"Max pending queries exceeded\"}",
"sources": [
"SCAD Cover.docx",
"Penland Studio Coordinator Cover Letter.docx",
"References.docx",
"List of Accomplishments 2020.pdf",
"DDF posters.pdf"
]
},
{
"batch_size": 5,
"status_code": 500,
"elapsed_s": 53.29,
"elapsed_per_episode_s": 10.66,
"response": null,
"error": "{\"detail\":\"Max pending queries exceeded\"}",
"sources": [
"CADI_Final Assignment_sample.docx",
"FDM Systems and Materials Reference Matrix EN.pdf",
"Mod2Quiz.docx",
"ChatGPT: Justification for vinyl machine",
"Voltage Divider.pptx"
]
},
{
"batch_size": 5,
"status_code": 200,
"elapsed_s": 25.31,
"elapsed_per_episode_s": 5.06,
"response": {
"ok": true,
"count": 5
},
"error": null,
"sources": [
"02_Point of Curves.docx",
"Tent Poles.docx",
"ViVOAtHome Spray Booth.pdf",
"Alexander_Peraza_38-007 Grad Thesis Request_Spring24.pdf",
"README.txt"
]
},
{
"batch_size": 5,
"status_code": 500,
"elapsed_s": 8.57,
"elapsed_per_episode_s": 1.71,
"response": null,
"error": "{\"detail\":\"Max pending queries exceeded\"}",
"sources": [
"DDF Program Specific Critical Thinking - AARON.docx",
"Course Calender.pdf",
"ACAD MIN.docx",
"CAA Workshop.pptx",
"Thesis Paper Guidlines.docx"
]
},
{
"batch_size": 5,
"status_code": 500,
"elapsed_s": 25.79,
"elapsed_per_episode_s": 5.16,
"response": null,
"error": "{\"detail\":\"Max pending queries exceeded\"}",
"sources": [
"Computational Media Week 1 Handout.docx",
"Aaron_Nelson_CVupdate.docx",
"Aaron Nelson - Artist Statement.pdf",
"DDF305 Course Increase Fee.docx",
"Aaron Nelson Art Resume.docx"
]
},
{
"batch_size": 5,
"status_code": 500,
"elapsed_s": 42.34,
"elapsed_per_episode_s": 8.47,
"response": null,
"error": "{\"detail\":\"Max pending queries exceeded\"}",
"sources": [
"Dylan McManus Recommendation.docx",
"Design Guide - FDM for Composite Tooling 2.0.pdf",
"Claude: Art jewelry discourse and contemporary trends",
"Aaron Nelson - CV.docx",
"Lecture 2 Update.pptx"
]
},
{
"batch_size": 5,
"status_code": 500,
"elapsed_s": 30.6,
"elapsed_per_episode_s": 6.12,
"response": null,
"error": "{\"detail\":\"Max pending queries exceeded\"}",
"sources": [
"readme.txt",
"CADII_PushPullTwist_Final Assignment .docx",
"Finn BIles Recommendation_Washington.pdf",
"Advanced CAD Syllabus DDF701 V3.docx",
"Senior Deisgn 2018.pdf"
]
}
],
"total_corpus_sources": 1166,
"estimated_migration_hours": 1.7
}
+95
View File
@@ -0,0 +1,95 @@
{
"n_retry_sources": 30,
"n_batches": 6,
"successful_batches": 4,
"failed_batches": 2,
"successful_episodes": 20,
"failed_episodes": 10,
"total_elapsed_s": 408.5,
"results": [
{
"batch_size": 5,
"status_code": 200,
"elapsed_s": 49.19,
"elapsed_per_episode_s": 9.84,
"error": null,
"sources": [
"SCAD Cover.docx",
"Penland Studio Coordinator Cover Letter.docx",
"References.docx",
"List of Accomplishments 2020.pdf",
"DDF posters.pdf"
]
},
{
"batch_size": 5,
"status_code": 200,
"elapsed_s": 84.16,
"elapsed_per_episode_s": 16.83,
"error": null,
"sources": [
"CADI_Final Assignment_sample.docx",
"FDM Systems and Materials Reference Matrix EN.pdf",
"Mod2Quiz.docx",
"ChatGPT: Justification for vinyl machine",
"Voltage Divider.pptx"
]
},
{
"batch_size": 5,
"status_code": 200,
"elapsed_s": 31.0,
"elapsed_per_episode_s": 6.2,
"error": null,
"sources": [
"DDF Program Specific Critical Thinking - AARON.docx",
"Course Calender.pdf",
"ACAD MIN.docx",
"CAA Workshop.pptx",
"Thesis Paper Guidlines.docx"
]
},
{
"batch_size": 5,
"status_code": 500,
"elapsed_s": 57.85,
"elapsed_per_episode_s": 11.57,
"error": "{\"detail\":\"Query timed out\"}",
"sources": [
"Computational Media Week 1 Handout.docx",
"Aaron_Nelson_CVupdate.docx",
"Aaron Nelson - Artist Statement.pdf",
"DDF305 Course Increase Fee.docx",
"Aaron Nelson Art Resume.docx"
]
},
{
"batch_size": 5,
"status_code": 500,
"elapsed_s": 66.15,
"elapsed_per_episode_s": 13.23,
"error": "{\"detail\":\"Query timed out\"}",
"sources": [
"Dylan McManus Recommendation.docx",
"Design Guide - FDM for Composite Tooling 2.0.pdf",
"Claude: Art jewelry discourse and contemporary trends",
"Aaron Nelson - CV.docx",
"Lecture 2 Update.pptx"
]
},
{
"batch_size": 5,
"status_code": 200,
"elapsed_s": 120.1,
"elapsed_per_episode_s": 24.02,
"error": null,
"sources": [
"readme.txt",
"CADII_PushPullTwist_Final Assignment .docx",
"Finn BIles Recommendation_Washington.pdf",
"Advanced CAD Syllabus DDF701 V3.docx",
"Senior Deisgn 2018.pdf"
]
}
]
}
+51
View File
@@ -0,0 +1,51 @@
{
"n_sources": 10,
"successful_batches": 4,
"failed_batches": 0,
"successful_episodes": 10,
"failed_episodes": 0,
"results": [
{
"batch_size": 3,
"status_code": 200,
"elapsed_s": 46.49,
"error": null,
"sources": [
"Computational Media Week 1 Handout.docx",
"Aaron_Nelson_CVupdate.docx",
"Aaron Nelson - Artist Statement.pdf"
]
},
{
"batch_size": 3,
"status_code": 200,
"elapsed_s": 38.21,
"error": null,
"sources": [
"DDF305 Course Increase Fee.docx",
"Aaron Nelson Art Resume.docx",
"Dylan McManus Recommendation.docx"
]
},
{
"batch_size": 3,
"status_code": 200,
"elapsed_s": 132.51,
"error": null,
"sources": [
"Design Guide - FDM for Composite Tooling 2.0.pdf",
"Claude: Art jewelry discourse and contemporary trends",
"Aaron Nelson - CV.docx"
]
},
{
"batch_size": 1,
"status_code": 200,
"elapsed_s": 18.63,
"error": null,
"sources": [
"Lecture 2 Update.pptx"
]
}
]
}
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
+193
View File
@@ -0,0 +1,193 @@
#!/usr/bin/env python3
"""
Audit Expansion Pack Generator — type-aware stratified draw of 12
documents from base_class_validation_results.json for n=20 audit expansion.
Per audit-expansion-protocol.md amendment 2026-04-28:
The seed=43 length-only random draw concentrated on course modules in the
small and medium buckets, missing voice captures, syllabi, and
conversational documents present in the candidate distribution.
This script implements type-aware stratification within each length
bucket to produce a sample representative of BirdAI's document-type mix.
Targets (12 total):
small (4): 2 course_module + 2 voice_capture
medium (4): 2 course_module + 1 syllabus + 1 other
large (4): 1 course_ppt + 1 syllabus + 1 faculty_report + 1 conversational
Output: ~/aaronai/experiments/audit_expansion_pack.json
Usage:
python3 ~/aaronai/scripts/audit_expansion_draw.py
python3 ~/aaronai/scripts/audit_expansion_draw.py --dry-run
"""
import argparse
import json
import random
import re
import sys
import time
from pathlib import Path
EXPERIMENTS = Path.home() / "aaronai" / "experiments"
VALIDATION_RESULTS = EXPERIMENTS / "base_class_validation_results.json"
EXISTING_AUDIT_PACK = EXPERIMENTS / "base_class_audit_pack.json"
OUTPUT_FILE = EXPERIMENTS / "audit_expansion_pack.json"
SEED = 43
# Type-aware targets per bucket
TYPE_TARGETS = {
"small": {"course_module": 2, "voice_capture": 2},
"medium": {"course_module": 2, "syllabus": 1, "other": 1},
"large": {"course_ppt": 1, "syllabus": 1, "faculty_report": 1, "conversational": 1},
}
def classify(source, bucket):
"""Map a source filename to a document type, scoped to bucket where
type categories overlap (e.g., 'course_module' vs 'course_ppt')."""
s = source.lower()
# Voice captures — pattern: YYYY-MM-DD-HH-MM-voice.md
if re.match(r"\d{4}-\d{2}-\d{2}-\d{2}-\d{2}-voice\.md$", source):
return "voice_capture"
# Conversational exports — pattern: "Claude: ..." or "ChatGPT: ..."
if source.startswith("Claude:") or source.startswith("ChatGPT:"):
return "conversational"
# Syllabus — must contain "syllabus" in the name
if "syllabus" in s:
return "syllabus"
# Faculty / annual reports
if "faculty report" in s or "annual report" in s:
return "faculty_report"
# Course PPTs (large bucket) — pattern: "_PPT_" or "_v3.pptx" or "Mod0N_"
if bucket == "large" and (".pptx" in s or "_ppt_" in s or re.match(r"mod\d+_", s)):
return "course_ppt"
# Course modules (small/medium bucket) — pattern: "0N_*.docx" or numeric prefix
if re.match(r"^\d{2}_", source):
return "course_module"
# Everything else falls into 'other' for medium; not used in small/large targets
return "other"
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--dry-run", action="store_true")
args = parser.parse_args()
if not VALIDATION_RESULTS.exists():
print(f"ERROR: {VALIDATION_RESULTS} not found", file=sys.stderr)
sys.exit(1)
with open(VALIDATION_RESULTS) as f:
validation = json.load(f)
all_docs = validation["results"]
print(f"Loaded {len(all_docs)} documents from validation results")
print(f"Experiment: {validation.get('title', 'unknown')}")
# Load existing audit pack to exclude its sources (audit pack uses 'pairs')
excluded_sources = set()
if EXISTING_AUDIT_PACK.exists():
with open(EXISTING_AUDIT_PACK) as f:
existing = json.load(f)
existing_pairs = existing.get("pairs", existing.get("results", existing))
for doc in existing_pairs:
src = doc.get("source")
if src:
excluded_sources.add(src)
print(f"Excluding {len(excluded_sources)} sources already in audit pack")
# Filter to valid candidates
valid_docs = []
for doc in all_docs:
src = doc.get("source")
if src in excluded_sources:
continue
if not doc.get("condition_a") or not doc.get("condition_b"):
continue
bucket = doc.get("size_bucket")
if bucket not in TYPE_TARGETS:
continue
doc["_type"] = classify(src, bucket)
valid_docs.append(doc)
print(f"Valid candidate documents: {len(valid_docs)}")
# Print what's available per (bucket, type) before drawing
print(f"\nCandidates by (bucket, type):")
for bucket in TYPE_TARGETS:
bucket_docs = [d for d in valid_docs if d["size_bucket"] == bucket]
types_in_bucket = {}
for d in bucket_docs:
types_in_bucket.setdefault(d["_type"], []).append(d)
print(f" {bucket}:")
for t in sorted(types_in_bucket.keys()):
target = TYPE_TARGETS[bucket].get(t, "")
print(f" {t:>16}: {len(types_in_bucket[t])} avail, target {target}")
# Stratified type-aware draw
random.seed(SEED)
drawn = []
warnings = []
for bucket, type_targets in TYPE_TARGETS.items():
bucket_docs = [d for d in valid_docs if d["size_bucket"] == bucket]
for doc_type, target in type_targets.items():
type_docs = [d for d in bucket_docs if d["_type"] == doc_type]
if len(type_docs) < target:
msg = (f"WARNING: bucket={bucket} type={doc_type} "
f"available={len(type_docs)} target={target}")
warnings.append(msg)
print(msg, file=sys.stderr)
n_to_draw = min(target, len(type_docs))
sample = random.sample(type_docs, n_to_draw)
drawn.extend(sample)
# Report draw
print(f"\nDrew {len(drawn)} documents:")
for d in drawn:
src = d.get("source", "<unknown>")
chars = d.get("doc_chars_original", 0)
bucket = d.get("size_bucket", "?")
doc_type = d.get("_type", "?")
truncated = " (TRUNCATED)" if d.get("truncated") else ""
print(f" [{bucket:>6}/{doc_type:>16}] {chars:>6}c {src}{truncated}")
# Bucket-level summary
bucket_counts = {"small": 0, "medium": 0, "large": 0}
for d in drawn:
bucket_counts[d["size_bucket"]] += 1
print(f"\nBucket totals: {bucket_counts}")
if args.dry_run:
print(f"\n--dry-run set, not writing output file")
return
output = {
"metadata": {
"generated_at": time.strftime("%Y-%m-%dT%H:%M:%S"),
"source_validation_file": str(VALIDATION_RESULTS),
"seed": SEED,
"stratification": "type-aware within length bucket",
"type_targets": TYPE_TARGETS,
"bucket_counts": bucket_counts,
"excluded_count": len(excluded_sources),
"warnings": warnings,
"purpose": "n=20 audit expansion per audit-expansion-protocol.md (type-aware amendment)",
},
"results": drawn,
}
with open(OUTPUT_FILE, "w") as f:
json.dump(output, f, indent=2, default=str)
print(f"\nWrote {OUTPUT_FILE}")
print(f" {len(drawn)} documents ready for rating")
if __name__ == "__main__":
main()
+605
View File
@@ -0,0 +1,605 @@
#!/usr/bin/env python3
"""
Base-Class Enrichment Test — OOP Framing Experiment
Tests whether non-entity metadata from a local model (domain class, structural
signals, presence flags, length, summary) can take load off the API without
constraining what it extracts.
The local model does NOT draft entities. The API still does full extraction.
The local model produces metadata that orients the API's reading.
Conditions:
A — Baseline: single Claude Haiku call, full extraction, no metadata
B — Base-class: Mistral metadata + Haiku full extraction with metadata as frame
Critical test: B's edge count and predicate diversity must be ≥A's, or close.
If B produces fewer edges or less predicate diversity, metadata is acting as
constraint and the OOP framing is falsified.
Sample: 50 docs from briefing_test_v2_results.json:
- 15 small (<1000 chars)
- 25 medium (1000-5000 chars)
- 10 large (5000-12000 chars, capped at 12K)
Outputs: ~/aaronai/experiments/base_class_audit_rerun_results.json
"""
import json
import os
import re
import statistics
import sys
import time
from datetime import datetime, timezone
from pathlib import Path
import anthropic
import psycopg2
import requests
from dotenv import load_dotenv
load_dotenv(Path.home() / "aaronai" / ".env")
V2_FILE = Path.home() / "aaronai" / "briefing_test_v2_results.json"
OUTPUT_FILE = Path.home() / "aaronai" / "experiments" / "base_class_audit_rerun_results.json"
HAIKU_MODEL = "claude-haiku-4-5-20251001"
HAIKU_MAX_TOKENS = 8192
HAIKU_TEMPERATURE = 0.0
OLLAMA_URL = "http://localhost:11434/api/generate"
LOCAL_MODEL = "mistral"
LOCAL_TIMEOUT = 180
MAX_DOC_CHARS = 12000
HAIKU_IN_PER_M = 1.0
HAIKU_OUT_PER_M = 5.0
CONDITION_A_PROMPT = """Extract a knowledge graph from the document below.
Return ONLY valid JSON with this exact schema:
{
"entities": [
{"name": string, "type": string}
],
"edges": [
{"subject": string, "predicate": string, "object": string}
]
}
Entity types: use whatever fits the entity. Do not constrain yourself to a fixed list.
Edge predicates: natural language phrases that capture the actual relationship the document states or implies.
Extract every entity and every relationship the document states or strongly implies. Both subject and object in every edge must appear in entities. JSON only, no commentary, no markdown fences.
DOCUMENT:
"""
LOCAL_METADATA_PROMPT = """Analyze the document below and produce metadata describing its surface features. Do NOT extract entities. Do NOT identify content. Only produce structural and surface-level metadata.
Return ONLY valid JSON with this exact schema:
{
"language": "en or other",
"char_length": integer,
"primary_format": "prose, presentation, list, form, code, or mixed",
"structural_signals": {
"has_headings": boolean,
"has_bullet_lists": boolean,
"has_numbered_lists": boolean,
"has_tables": boolean,
"has_code_blocks": boolean,
"has_dates": boolean
},
"content_signals": {
"has_named_people": boolean,
"has_institutional_language": boolean,
"has_technical_terminology": boolean,
"has_first_person": boolean,
"has_quotations": boolean
},
"domain_class": "technical, administrative, personal, educational, creative, reference, or mixed",
"one_sentence_summary": "string of 25 words or fewer describing what the document is about"
}
JSON only, no commentary.
DOCUMENT:
"""
CONDITION_B_API_PROMPT = """You are extracting a knowledge graph from a document. The document has been pre-analyzed by a local model and the following metadata is provided as orienting context — not as constraint. Extract every entity and every relationship in the document. Do not limit your extraction to what the metadata suggests; the metadata is here to orient your reading, not to bound it.
DOCUMENT METADATA:
{metadata_json}
Return ONLY valid JSON with this exact schema:
{
"entities": [
{"name": string, "type": string}
],
"edges": [
{"subject": string, "predicate": string, "object": string}
]
}
Entity types: use whatever fits. Edge predicates: natural language phrases capturing the actual relationship. Both subject and object in every edge must appear in entities. Extract every entity and every relationship the document states or strongly implies. Do not filter for salience. JSON only, no commentary, no markdown fences.
DOCUMENT:
"""
def strip_json_fences(text):
if not text:
return ""
t = text.strip()
t = re.sub(r"^```(?:json)?\s*", "", t)
t = re.sub(r"\s*```$", "", t)
return t.strip()
def fetch_document_text(pg_conn, source):
cur = pg_conn.cursor()
cur.execute(
"SELECT document FROM embeddings WHERE source = %s ORDER BY id",
(source,),
)
rows = cur.fetchall()
cur.close()
if not rows:
return None, 0
full = "\n\n".join(r[0] for r in rows)
return full[:MAX_DOC_CHARS], len(full)
def call_haiku(client, prompt_text):
t0 = time.time()
resp = client.messages.create(
model=HAIKU_MODEL,
max_tokens=HAIKU_MAX_TOKENS,
temperature=HAIKU_TEMPERATURE,
messages=[{"role": "user", "content": prompt_text}],
)
return {
"input_tokens": resp.usage.input_tokens,
"output_tokens": resp.usage.output_tokens,
"latency_s": round(time.time() - t0, 2),
"response_text": resp.content[0].text if resp.content else "",
"stop_reason": resp.stop_reason,
}
def call_local_metadata(document_text):
t0 = time.time()
try:
resp = requests.post(
OLLAMA_URL,
json={
"model": LOCAL_MODEL,
"prompt": LOCAL_METADATA_PROMPT + document_text,
"stream": False,
"format": "json",
"options": {"num_predict": 1024, "temperature": 0, "num_ctx": 12288},
},
timeout=LOCAL_TIMEOUT,
)
resp.raise_for_status()
return {
"response": resp.json().get("response", ""),
"latency_s": round(time.time() - t0, 2),
}
except Exception as e:
return {"error": str(e), "latency_s": round(time.time() - t0, 2)}
def parse_graph_full(raw):
"""Return (entities_list, edges_list, parsed_ok). Lists for metric computation."""
cleaned = strip_json_fences(raw)
if not cleaned:
return None, None, False
try:
data = json.loads(cleaned)
except json.JSONDecodeError:
return None, None, False
if not isinstance(data, dict):
return None, None, False
ents = data.get("entities")
edges = data.get("edges")
if isinstance(ents, list) and isinstance(edges, list):
return ents, edges, True
return None, None, False
def parse_metadata(raw):
cleaned = strip_json_fences(raw)
if not cleaned:
return None
try:
return json.loads(cleaned)
except json.JSONDecodeError:
return None
def graph_metrics(entities, edges):
"""Compute graph quality metrics. Inputs are lists from parse_graph_full."""
if entities is None or edges is None:
return None
n_entities = len(entities)
n_edges = len(edges)
# Predicate diversity
predicates = set()
for e in edges:
if isinstance(e, dict):
p = e.get("predicate")
if p:
predicates.add(str(p).strip().lower())
predicate_diversity = len(predicates)
# Entity type diversity
types = set()
for ent in entities:
if isinstance(ent, dict):
t = ent.get("type")
if t:
types.add(str(t).strip().lower())
type_diversity = len(types)
# Average degree (edges*2 / entities — each edge touches two nodes)
avg_degree = (2 * n_edges / n_entities) if n_entities > 0 else 0.0
# Largest connected component
# Build adjacency from edges
entity_names = set()
for ent in entities:
if isinstance(ent, dict):
n = ent.get("name")
if n:
entity_names.add(str(n).strip().lower())
adj = {name: set() for name in entity_names}
for e in edges:
if not isinstance(e, dict):
continue
s = str(e.get("subject", "")).strip().lower()
o = str(e.get("object", "")).strip().lower()
if s in adj and o in adj:
adj[s].add(o)
adj[o].add(s)
# BFS for largest component
visited = set()
largest = 0
for start in adj:
if start in visited:
continue
component = 0
stack = [start]
while stack:
node = stack.pop()
if node in visited:
continue
visited.add(node)
component += 1
for neighbor in adj[node]:
if neighbor not in visited:
stack.append(neighbor)
if component > largest:
largest = component
return {
"n_entities": n_entities,
"n_edges": n_edges,
"predicate_diversity": predicate_diversity,
"type_diversity": type_diversity,
"avg_degree": round(avg_degree, 2),
"largest_component": largest,
"largest_component_pct": round(100 * largest / n_entities, 1) if n_entities else 0.0,
}
def stratify(docs):
"""Audit re-run: load the 10 audit docs from base_class_audit_pack.json."""
import json as _json
audit_file = Path.home() / "aaronai" / "experiments" / "base_class_audit_pack.json"
if not audit_file.exists():
print(f"ERROR: {audit_file} not found")
return []
audit = _json.loads(audit_file.read_text())
audit_sources = [p["source"] for p in audit["pairs"]]
# Synthesize doc_meta entries for the audit sources
sample = [{"source": s, "content_length": 0, "status": "SUCCESS"}
for s in audit_sources]
print(f"Audit re-run: {len(sample)} docs from base_class_audit_pack.json")
return sample
def fmt_metrics(m):
if m is None:
return "n/a"
return (f"e={m['n_entities']} edge={m['n_edges']} "
f"pred={m['predicate_diversity']} type={m['type_diversity']} "
f"deg={m['avg_degree']} comp={m['largest_component']}/{m['n_entities']}")
def main():
api_key = os.environ.get("ANTHROPIC_API_KEY")
pg_dsn = os.environ.get("PG_DSN")
if not api_key or not pg_dsn:
print("ERROR: ANTHROPIC_API_KEY or PG_DSN not set", file=sys.stderr)
sys.exit(1)
if not V2_FILE.exists():
print(f"ERROR: {V2_FILE} not found", file=sys.stderr)
sys.exit(1)
with open(V2_FILE) as f:
v2 = json.load(f)
docs_meta = [d for d in v2["documents"] if d.get("status") == "SUCCESS"]
sample = stratify(docs_meta)
print(f"Sample: {len(sample)} docs (15s/25m/10l, file order)")
print(f"Mistral context: 12288 tokens, doc cap {MAX_DOC_CHARS} chars")
print(f"Haiku model: {HAIKU_MODEL} temp={HAIKU_TEMPERATURE}")
print(f"Test: base-class metadata as orienting frame, NOT entity drafting")
print()
client = anthropic.Anthropic(api_key=api_key)
pg_conn = psycopg2.connect(pg_dsn)
results = []
started_at = datetime.now(timezone.utc).isoformat()
t_total = time.time()
for i, doc_meta in enumerate(sample, 1):
source = doc_meta["source"]
doc_text, original_len = fetch_document_text(pg_conn, source)
if not doc_text:
print(f"[{i:02d}/{len(sample)}] {source[:55]} — SKIP (not in pgvector)")
results.append({"source": source, "skipped": "not_in_pgvector"})
continue
sent_len = len(doc_text)
truncated = original_len > sent_len
size_bucket = (
"small" if sent_len < 1000
else "medium" if sent_len < 5000
else "large"
)
trunc_marker = "*" if truncated else " "
print(f"[{i:02d}/{len(sample)}] [{size_bucket:6s}] [{sent_len:>5}c{trunc_marker}] {source[:55]}", flush=True)
# Condition A
try:
a = call_haiku(client, CONDITION_A_PROMPT + doc_text)
a_ents, a_edges, a_ok = parse_graph_full(a["response_text"])
a_metrics = graph_metrics(a_ents, a_edges) if a_ok else None
print(f" A: in={a['input_tokens']} out={a['output_tokens']} "
f"stop={a['stop_reason']} t={a['latency_s']}s", flush=True)
print(f" {fmt_metrics(a_metrics)}", flush=True)
except Exception as e:
print(f" A FAILED: {e}", flush=True)
a = {"error": str(e)}
a_metrics = None
# Condition B local metadata pass
local_result = call_local_metadata(doc_text)
if "error" in local_result:
print(f" B local FAILED: {local_result['error']}", flush=True)
results.append({
"source": source,
"size_bucket": size_bucket,
"doc_chars_original": original_len,
"doc_chars_sent": sent_len,
"truncated": truncated,
"condition_a": {
"input_tokens": a.get("input_tokens"),
"output_tokens": a.get("output_tokens"),
"latency_s": a.get("latency_s"),
"metrics": a_metrics,
"stop_reason": a.get("stop_reason"),
"response_text": a.get("response_text", "")[:32000],
"error": a.get("error"),
},
"condition_b": {
"skipped": "local_model_failed",
"local_error": local_result["error"],
"local_latency_s": local_result.get("latency_s"),
},
})
continue
local_raw = local_result["response"]
metadata = parse_metadata(local_raw)
# Override LLM-hallucinated char_length with Python-computed truth
if metadata is not None and isinstance(metadata, dict):
metadata["char_length"] = len(doc_text)
print(f" B local: t={local_result['latency_s']}s metadata_parsed={metadata is not None}",
flush=True)
if metadata is None:
print(f" B: metadata parse failed — skipping API call", flush=True)
results.append({
"source": source,
"size_bucket": size_bucket,
"doc_chars_original": original_len,
"doc_chars_sent": sent_len,
"truncated": truncated,
"condition_a": {
"input_tokens": a.get("input_tokens"),
"output_tokens": a.get("output_tokens"),
"latency_s": a.get("latency_s"),
"metrics": a_metrics,
"stop_reason": a.get("stop_reason"),
"response_text": a.get("response_text", "")[:32000],
"error": a.get("error"),
},
"condition_b": {
"skipped": "metadata_parse_failed",
"local_latency_s": local_result.get("latency_s"),
"local_raw": local_raw[:1000],
},
})
continue
metadata_json = json.dumps(metadata, ensure_ascii=False, indent=2)
b_prompt = CONDITION_B_API_PROMPT.replace("{metadata_json}", metadata_json) + doc_text
try:
b = call_haiku(client, b_prompt)
b_ents, b_edges, b_ok = parse_graph_full(b["response_text"])
b_metrics = graph_metrics(b_ents, b_edges) if b_ok else None
print(f" B api: in={b['input_tokens']} out={b['output_tokens']} "
f"stop={b['stop_reason']} t={b['latency_s']}s", flush=True)
print(f" {fmt_metrics(b_metrics)}", flush=True)
except Exception as e:
print(f" B api FAILED: {e}", flush=True)
b = {"error": str(e)}
b_metrics = None
# Per-doc deltas
if "input_tokens" in a and "input_tokens" in b:
in_pct = (b["input_tokens"] - a["input_tokens"]) / a["input_tokens"] * 100 if a["input_tokens"] else 0.0
out_pct = (b["output_tokens"] - a["output_tokens"]) / a["output_tokens"] * 100 if a["output_tokens"] else 0.0
edge_pct_str = "n/a"
pred_pct_str = "n/a"
if a_metrics and b_metrics:
if a_metrics["n_edges"] > 0:
edge_pct_str = f"{(b_metrics['n_edges'] - a_metrics['n_edges']) / a_metrics['n_edges'] * 100:+.1f}%"
if a_metrics["predicate_diversity"] > 0:
pred_pct_str = f"{(b_metrics['predicate_diversity'] - a_metrics['predicate_diversity']) / a_metrics['predicate_diversity'] * 100:+.1f}%"
print(f" Δ in={in_pct:+.1f}% out={out_pct:+.1f}% edges={edge_pct_str} pred={pred_pct_str}",
flush=True)
results.append({
"source": source,
"size_bucket": size_bucket,
"doc_chars_original": original_len,
"doc_chars_sent": sent_len,
"truncated": truncated,
"condition_a": {
"input_tokens": a.get("input_tokens"),
"output_tokens": a.get("output_tokens"),
"latency_s": a.get("latency_s"),
"metrics": a_metrics,
"stop_reason": a.get("stop_reason"),
"response_text": a.get("response_text", "")[:32000],
"error": a.get("error"),
},
"condition_b": {
"local_latency_s": local_result.get("latency_s"),
"local_metadata": metadata,
"local_raw": local_raw[:1000],
"api_input_tokens": b.get("input_tokens"),
"api_output_tokens": b.get("output_tokens"),
"api_latency_s": b.get("latency_s"),
"metrics": b_metrics,
"stop_reason": b.get("stop_reason"),
"response_text": b.get("response_text", "")[:32000],
"error": b.get("error"),
},
})
pg_conn.close()
total_elapsed = round(time.time() - t_total, 1)
valid = [r for r in results
if r.get("condition_a", {}).get("metrics") is not None
and r.get("condition_b", {}).get("metrics") is not None]
a_in = sum(r["condition_a"]["input_tokens"] for r in valid)
a_out = sum(r["condition_a"]["output_tokens"] for r in valid)
b_in = sum(r["condition_b"]["api_input_tokens"] for r in valid)
b_out = sum(r["condition_b"]["api_output_tokens"] for r in valid)
a_cost = (a_in * HAIKU_IN_PER_M + a_out * HAIKU_OUT_PER_M) / 1_000_000
b_cost = (b_in * HAIKU_IN_PER_M + b_out * HAIKU_OUT_PER_M) / 1_000_000
def avg_metric(rows, condition, key):
vals = [r[condition]["metrics"][key] for r in rows if r[condition]["metrics"]]
return round(statistics.mean(vals), 2) if vals else None
by_bucket = {}
for bucket in ("small", "medium", "large"):
rows = [r for r in valid if r["size_bucket"] == bucket]
if not rows:
by_bucket[bucket] = None
continue
ai = sum(r["condition_a"]["input_tokens"] for r in rows)
ao = sum(r["condition_a"]["output_tokens"] for r in rows)
bi = sum(r["condition_b"]["api_input_tokens"] for r in rows)
bo = sum(r["condition_b"]["api_output_tokens"] for r in rows)
by_bucket[bucket] = {
"n": len(rows),
"input_delta_pct": round((bi - ai) / ai * 100, 2) if ai else None,
"output_delta_pct": round((bo - ao) / ao * 100, 2) if ao else None,
"a_avg_entities": avg_metric(rows, "condition_a", "n_entities"),
"b_avg_entities": avg_metric(rows, "condition_b", "n_entities"),
"a_avg_edges": avg_metric(rows, "condition_a", "n_edges"),
"b_avg_edges": avg_metric(rows, "condition_b", "n_edges"),
"a_avg_predicate_diversity": avg_metric(rows, "condition_a", "predicate_diversity"),
"b_avg_predicate_diversity": avg_metric(rows, "condition_b", "predicate_diversity"),
"a_avg_type_diversity": avg_metric(rows, "condition_a", "type_diversity"),
"b_avg_type_diversity": avg_metric(rows, "condition_b", "type_diversity"),
"a_avg_degree": avg_metric(rows, "condition_a", "avg_degree"),
"b_avg_degree": avg_metric(rows, "condition_b", "avg_degree"),
"a_avg_largest_component_pct": avg_metric(rows, "condition_a", "largest_component_pct"),
"b_avg_largest_component_pct": avg_metric(rows, "condition_b", "largest_component_pct"),
}
summary = {
"experiment": "base_class_test",
"title": "Base-Class Enrichment — OOP Framing",
"started_at": started_at,
"completed_at": datetime.now(timezone.utc).isoformat(),
"haiku_model": HAIKU_MODEL,
"local_model": LOCAL_MODEL,
"max_doc_chars": MAX_DOC_CHARS,
"n_documents": len(sample),
"n_valid_pairs": len(valid),
"total_elapsed_s": total_elapsed,
"totals": {
"a_input_tokens": a_in,
"a_output_tokens": a_out,
"b_input_tokens": b_in,
"b_output_tokens": b_out,
"a_cost_usd": round(a_cost, 4),
"b_cost_usd": round(b_cost, 4),
"cost_delta_usd": round(b_cost - a_cost, 4),
"cost_delta_pct": round((b_cost - a_cost) / a_cost * 100, 2) if a_cost else None,
"note": "API cost only — local Mistral runtime on VPS not monetized",
},
"by_size_bucket": by_bucket,
"results": results,
}
OUTPUT_FILE.parent.mkdir(parents=True, exist_ok=True)
with open(OUTPUT_FILE, "w") as f:
json.dump(summary, f, indent=2)
print()
print("=" * 60)
print(f"DONE — {len(valid)}/{len(sample)} valid pairs in {total_elapsed}s")
print(f"A total cost: ${a_cost:.4f} (in={a_in} out={a_out})")
print(f"B total cost: ${b_cost:.4f} (in={b_in} out={b_out})")
delta_pct = summary['totals']['cost_delta_pct']
if delta_pct is not None:
verdict = "B cheaper" if delta_pct < 0 else "B more expensive"
print(f"Cost delta: {delta_pct:+.2f}% ({verdict})")
print()
print("By bucket — graph metrics (A vs B):")
for bucket, stats in by_bucket.items():
if stats:
print(f" {bucket:6s} (n={stats['n']}):")
print(f" cost: in {stats['input_delta_pct']:+.1f}% out {stats['output_delta_pct']:+.1f}%")
print(f" entities: A={stats['a_avg_entities']} B={stats['b_avg_entities']}")
print(f" edges: A={stats['a_avg_edges']} B={stats['b_avg_edges']}")
print(f" predicate diversity: A={stats['a_avg_predicate_diversity']} B={stats['b_avg_predicate_diversity']}")
print(f" type diversity: A={stats['a_avg_type_diversity']} B={stats['b_avg_type_diversity']}")
print(f" avg degree: A={stats['a_avg_degree']} B={stats['b_avg_degree']}")
print(f" largest component %: A={stats['a_avg_largest_component_pct']} B={stats['b_avg_largest_component_pct']}")
print()
print(f"Results: {OUTPUT_FILE}")
if __name__ == "__main__":
main()
+593
View File
@@ -0,0 +1,593 @@
#!/usr/bin/env python3
"""
Base-Class Enrichment Test — OOP Framing Experiment
Tests whether non-entity metadata from a local model (domain class, structural
signals, presence flags, length, summary) can take load off the API without
constraining what it extracts.
The local model does NOT draft entities. The API still does full extraction.
The local model produces metadata that orients the API's reading.
Conditions:
A — Baseline: single Claude Haiku call, full extraction, no metadata
B — Base-class: Mistral metadata + Haiku full extraction with metadata as frame
Critical test: B's edge count and predicate diversity must be ≥A's, or close.
If B produces fewer edges or less predicate diversity, metadata is acting as
constraint and the OOP framing is falsified.
Sample: 20 docs from briefing_test_v2_results.json:
- 5 small (<1000 chars)
- 10 medium (1000-5000 chars)
- 5 large (5000-12000 chars, capped at 12K)
Outputs: ~/aaronai/experiments/base_class_test_results.json
"""
import json
import os
import re
import statistics
import sys
import time
from datetime import datetime, timezone
from pathlib import Path
import anthropic
import psycopg2
import requests
from dotenv import load_dotenv
load_dotenv(Path.home() / "aaronai" / ".env")
V2_FILE = Path.home() / "aaronai" / "briefing_test_v2_results.json"
OUTPUT_FILE = Path.home() / "aaronai" / "experiments" / "base_class_test_results.json"
HAIKU_MODEL = "claude-haiku-4-5-20251001"
HAIKU_MAX_TOKENS = 4096
HAIKU_TEMPERATURE = 0.0
OLLAMA_URL = "http://localhost:11434/api/generate"
LOCAL_MODEL = "mistral"
LOCAL_TIMEOUT = 180
MAX_DOC_CHARS = 12000
HAIKU_IN_PER_M = 1.0
HAIKU_OUT_PER_M = 5.0
CONDITION_A_PROMPT = """Extract a knowledge graph from the document below.
Return ONLY valid JSON with this exact schema:
{
"entities": [
{"name": string, "type": string}
],
"edges": [
{"subject": string, "predicate": string, "object": string}
]
}
Entity types: use whatever fits the entity. Do not constrain yourself to a fixed list.
Edge predicates: natural language phrases that capture the actual relationship the document states or implies.
Extract every entity and every relationship the document states or strongly implies. Both subject and object in every edge must appear in entities. JSON only, no commentary, no markdown fences.
DOCUMENT:
"""
LOCAL_METADATA_PROMPT = """Analyze the document below and produce metadata describing its surface features. Do NOT extract entities. Do NOT identify content. Only produce structural and surface-level metadata.
Return ONLY valid JSON with this exact schema:
{
"language": "en or other",
"char_length": integer,
"primary_format": "prose, presentation, list, form, code, or mixed",
"structural_signals": {
"has_headings": boolean,
"has_bullet_lists": boolean,
"has_numbered_lists": boolean,
"has_tables": boolean,
"has_code_blocks": boolean,
"has_dates": boolean
},
"content_signals": {
"has_named_people": boolean,
"has_institutional_language": boolean,
"has_technical_terminology": boolean,
"has_first_person": boolean,
"has_quotations": boolean
},
"domain_class": "technical, administrative, personal, educational, creative, reference, or mixed",
"one_sentence_summary": "string of 25 words or fewer describing what the document is about"
}
JSON only, no commentary.
DOCUMENT:
"""
CONDITION_B_API_PROMPT = """You are extracting a knowledge graph from a document. The document has been pre-analyzed by a local model and the following metadata is provided as orienting context — not as constraint. Extract every entity and every relationship in the document. Do not limit your extraction to what the metadata suggests; the metadata is here to orient your reading, not to bound it.
DOCUMENT METADATA:
{metadata_json}
Return ONLY valid JSON with this exact schema:
{
"entities": [
{"name": string, "type": string}
],
"edges": [
{"subject": string, "predicate": string, "object": string}
]
}
Entity types: use whatever fits. Edge predicates: natural language phrases capturing the actual relationship. Both subject and object in every edge must appear in entities. Extract every entity and every relationship the document states or strongly implies. Do not filter for salience. JSON only, no commentary, no markdown fences.
DOCUMENT:
"""
def strip_json_fences(text):
if not text:
return ""
t = text.strip()
t = re.sub(r"^```(?:json)?\s*", "", t)
t = re.sub(r"\s*```$", "", t)
return t.strip()
def fetch_document_text(pg_conn, source):
cur = pg_conn.cursor()
cur.execute(
"SELECT document FROM embeddings WHERE source = %s ORDER BY id",
(source,),
)
rows = cur.fetchall()
cur.close()
if not rows:
return None, 0
full = "\n\n".join(r[0] for r in rows)
return full[:MAX_DOC_CHARS], len(full)
def call_haiku(client, prompt_text):
t0 = time.time()
resp = client.messages.create(
model=HAIKU_MODEL,
max_tokens=HAIKU_MAX_TOKENS,
temperature=HAIKU_TEMPERATURE,
messages=[{"role": "user", "content": prompt_text}],
)
return {
"input_tokens": resp.usage.input_tokens,
"output_tokens": resp.usage.output_tokens,
"latency_s": round(time.time() - t0, 2),
"response_text": resp.content[0].text if resp.content else "",
"stop_reason": resp.stop_reason,
}
def call_local_metadata(document_text):
t0 = time.time()
try:
resp = requests.post(
OLLAMA_URL,
json={
"model": LOCAL_MODEL,
"prompt": LOCAL_METADATA_PROMPT + document_text,
"stream": False,
"format": "json",
"options": {"num_predict": 1024, "temperature": 0, "num_ctx": 12288},
},
timeout=LOCAL_TIMEOUT,
)
resp.raise_for_status()
return {
"response": resp.json().get("response", ""),
"latency_s": round(time.time() - t0, 2),
}
except Exception as e:
return {"error": str(e), "latency_s": round(time.time() - t0, 2)}
def parse_graph_full(raw):
"""Return (entities_list, edges_list, parsed_ok). Lists for metric computation."""
cleaned = strip_json_fences(raw)
if not cleaned:
return None, None, False
try:
data = json.loads(cleaned)
except json.JSONDecodeError:
return None, None, False
if not isinstance(data, dict):
return None, None, False
ents = data.get("entities")
edges = data.get("edges")
if isinstance(ents, list) and isinstance(edges, list):
return ents, edges, True
return None, None, False
def parse_metadata(raw):
cleaned = strip_json_fences(raw)
if not cleaned:
return None
try:
return json.loads(cleaned)
except json.JSONDecodeError:
return None
def graph_metrics(entities, edges):
"""Compute graph quality metrics. Inputs are lists from parse_graph_full."""
if entities is None or edges is None:
return None
n_entities = len(entities)
n_edges = len(edges)
# Predicate diversity
predicates = set()
for e in edges:
if isinstance(e, dict):
p = e.get("predicate")
if p:
predicates.add(str(p).strip().lower())
predicate_diversity = len(predicates)
# Entity type diversity
types = set()
for ent in entities:
if isinstance(ent, dict):
t = ent.get("type")
if t:
types.add(str(t).strip().lower())
type_diversity = len(types)
# Average degree (edges*2 / entities — each edge touches two nodes)
avg_degree = (2 * n_edges / n_entities) if n_entities > 0 else 0.0
# Largest connected component
# Build adjacency from edges
entity_names = set()
for ent in entities:
if isinstance(ent, dict):
n = ent.get("name")
if n:
entity_names.add(str(n).strip().lower())
adj = {name: set() for name in entity_names}
for e in edges:
if not isinstance(e, dict):
continue
s = str(e.get("subject", "")).strip().lower()
o = str(e.get("object", "")).strip().lower()
if s in adj and o in adj:
adj[s].add(o)
adj[o].add(s)
# BFS for largest component
visited = set()
largest = 0
for start in adj:
if start in visited:
continue
component = 0
stack = [start]
while stack:
node = stack.pop()
if node in visited:
continue
visited.add(node)
component += 1
for neighbor in adj[node]:
if neighbor not in visited:
stack.append(neighbor)
if component > largest:
largest = component
return {
"n_entities": n_entities,
"n_edges": n_edges,
"predicate_diversity": predicate_diversity,
"type_diversity": type_diversity,
"avg_degree": round(avg_degree, 2),
"largest_component": largest,
"largest_component_pct": round(100 * largest / n_entities, 1) if n_entities else 0.0,
}
def stratify(docs):
sized = [(d, d["content_length"]) for d in docs]
small = [d for d, n in sized if n < 1000]
medium = [d for d, n in sized if 1000 <= n < 5000]
large = [d for d, n in sized if n >= 5000]
return small[:5] + medium[:10] + large[:5]
def fmt_metrics(m):
if m is None:
return "n/a"
return (f"e={m['n_entities']} edge={m['n_edges']} "
f"pred={m['predicate_diversity']} type={m['type_diversity']} "
f"deg={m['avg_degree']} comp={m['largest_component']}/{m['n_entities']}")
def main():
api_key = os.environ.get("ANTHROPIC_API_KEY")
pg_dsn = os.environ.get("PG_DSN")
if not api_key or not pg_dsn:
print("ERROR: ANTHROPIC_API_KEY or PG_DSN not set", file=sys.stderr)
sys.exit(1)
if not V2_FILE.exists():
print(f"ERROR: {V2_FILE} not found", file=sys.stderr)
sys.exit(1)
with open(V2_FILE) as f:
v2 = json.load(f)
docs_meta = [d for d in v2["documents"] if d.get("status") == "SUCCESS"]
sample = stratify(docs_meta)
print(f"Sample: {len(sample)} docs (5s/10m/5l, file order)")
print(f"Mistral context: 12288 tokens, doc cap {MAX_DOC_CHARS} chars")
print(f"Haiku model: {HAIKU_MODEL} temp={HAIKU_TEMPERATURE}")
print(f"Test: base-class metadata as orienting frame, NOT entity drafting")
print()
client = anthropic.Anthropic(api_key=api_key)
pg_conn = psycopg2.connect(pg_dsn)
results = []
started_at = datetime.now(timezone.utc).isoformat()
t_total = time.time()
for i, doc_meta in enumerate(sample, 1):
source = doc_meta["source"]
doc_text, original_len = fetch_document_text(pg_conn, source)
if not doc_text:
print(f"[{i:02d}/{len(sample)}] {source[:55]} — SKIP (not in pgvector)")
results.append({"source": source, "skipped": "not_in_pgvector"})
continue
sent_len = len(doc_text)
truncated = original_len > sent_len
size_bucket = (
"small" if sent_len < 1000
else "medium" if sent_len < 5000
else "large"
)
trunc_marker = "*" if truncated else " "
print(f"[{i:02d}/{len(sample)}] [{size_bucket:6s}] [{sent_len:>5}c{trunc_marker}] {source[:55]}", flush=True)
# Condition A
try:
a = call_haiku(client, CONDITION_A_PROMPT + doc_text)
a_ents, a_edges, a_ok = parse_graph_full(a["response_text"])
a_metrics = graph_metrics(a_ents, a_edges) if a_ok else None
print(f" A: in={a['input_tokens']} out={a['output_tokens']} "
f"stop={a['stop_reason']} t={a['latency_s']}s", flush=True)
print(f" {fmt_metrics(a_metrics)}", flush=True)
except Exception as e:
print(f" A FAILED: {e}", flush=True)
a = {"error": str(e)}
a_metrics = None
# Condition B local metadata pass
local_result = call_local_metadata(doc_text)
if "error" in local_result:
print(f" B local FAILED: {local_result['error']}", flush=True)
results.append({
"source": source,
"size_bucket": size_bucket,
"doc_chars_original": original_len,
"doc_chars_sent": sent_len,
"truncated": truncated,
"condition_a": {
"input_tokens": a.get("input_tokens"),
"output_tokens": a.get("output_tokens"),
"latency_s": a.get("latency_s"),
"metrics": a_metrics,
"stop_reason": a.get("stop_reason"),
"response_text": a.get("response_text", "")[:4000],
"error": a.get("error"),
},
"condition_b": {
"skipped": "local_model_failed",
"local_error": local_result["error"],
"local_latency_s": local_result.get("latency_s"),
},
})
continue
local_raw = local_result["response"]
metadata = parse_metadata(local_raw)
print(f" B local: t={local_result['latency_s']}s metadata_parsed={metadata is not None}",
flush=True)
if metadata is None:
print(f" B: metadata parse failed — skipping API call", flush=True)
results.append({
"source": source,
"size_bucket": size_bucket,
"doc_chars_original": original_len,
"doc_chars_sent": sent_len,
"truncated": truncated,
"condition_a": {
"input_tokens": a.get("input_tokens"),
"output_tokens": a.get("output_tokens"),
"latency_s": a.get("latency_s"),
"metrics": a_metrics,
"stop_reason": a.get("stop_reason"),
"response_text": a.get("response_text", "")[:4000],
"error": a.get("error"),
},
"condition_b": {
"skipped": "metadata_parse_failed",
"local_latency_s": local_result.get("latency_s"),
"local_raw": local_raw[:1000],
},
})
continue
metadata_json = json.dumps(metadata, ensure_ascii=False, indent=2)
b_prompt = CONDITION_B_API_PROMPT.replace("{metadata_json}", metadata_json) + doc_text
try:
b = call_haiku(client, b_prompt)
b_ents, b_edges, b_ok = parse_graph_full(b["response_text"])
b_metrics = graph_metrics(b_ents, b_edges) if b_ok else None
print(f" B api: in={b['input_tokens']} out={b['output_tokens']} "
f"stop={b['stop_reason']} t={b['latency_s']}s", flush=True)
print(f" {fmt_metrics(b_metrics)}", flush=True)
except Exception as e:
print(f" B api FAILED: {e}", flush=True)
b = {"error": str(e)}
b_metrics = None
# Per-doc deltas
if "input_tokens" in a and "input_tokens" in b:
in_pct = (b["input_tokens"] - a["input_tokens"]) / a["input_tokens"] * 100 if a["input_tokens"] else 0.0
out_pct = (b["output_tokens"] - a["output_tokens"]) / a["output_tokens"] * 100 if a["output_tokens"] else 0.0
edge_pct_str = "n/a"
pred_pct_str = "n/a"
if a_metrics and b_metrics:
if a_metrics["n_edges"] > 0:
edge_pct_str = f"{(b_metrics['n_edges'] - a_metrics['n_edges']) / a_metrics['n_edges'] * 100:+.1f}%"
if a_metrics["predicate_diversity"] > 0:
pred_pct_str = f"{(b_metrics['predicate_diversity'] - a_metrics['predicate_diversity']) / a_metrics['predicate_diversity'] * 100:+.1f}%"
print(f" Δ in={in_pct:+.1f}% out={out_pct:+.1f}% edges={edge_pct_str} pred={pred_pct_str}",
flush=True)
results.append({
"source": source,
"size_bucket": size_bucket,
"doc_chars_original": original_len,
"doc_chars_sent": sent_len,
"truncated": truncated,
"condition_a": {
"input_tokens": a.get("input_tokens"),
"output_tokens": a.get("output_tokens"),
"latency_s": a.get("latency_s"),
"metrics": a_metrics,
"stop_reason": a.get("stop_reason"),
"response_text": a.get("response_text", "")[:4000],
"error": a.get("error"),
},
"condition_b": {
"local_latency_s": local_result.get("latency_s"),
"local_metadata": metadata,
"local_raw": local_raw[:1000],
"api_input_tokens": b.get("input_tokens"),
"api_output_tokens": b.get("output_tokens"),
"api_latency_s": b.get("latency_s"),
"metrics": b_metrics,
"stop_reason": b.get("stop_reason"),
"response_text": b.get("response_text", "")[:4000],
"error": b.get("error"),
},
})
pg_conn.close()
total_elapsed = round(time.time() - t_total, 1)
valid = [r for r in results
if r.get("condition_a", {}).get("metrics") is not None
and r.get("condition_b", {}).get("metrics") is not None]
a_in = sum(r["condition_a"]["input_tokens"] for r in valid)
a_out = sum(r["condition_a"]["output_tokens"] for r in valid)
b_in = sum(r["condition_b"]["api_input_tokens"] for r in valid)
b_out = sum(r["condition_b"]["api_output_tokens"] for r in valid)
a_cost = (a_in * HAIKU_IN_PER_M + a_out * HAIKU_OUT_PER_M) / 1_000_000
b_cost = (b_in * HAIKU_IN_PER_M + b_out * HAIKU_OUT_PER_M) / 1_000_000
def avg_metric(rows, condition, key):
vals = [r[condition]["metrics"][key] for r in rows if r[condition]["metrics"]]
return round(statistics.mean(vals), 2) if vals else None
by_bucket = {}
for bucket in ("small", "medium", "large"):
rows = [r for r in valid if r["size_bucket"] == bucket]
if not rows:
by_bucket[bucket] = None
continue
ai = sum(r["condition_a"]["input_tokens"] for r in rows)
ao = sum(r["condition_a"]["output_tokens"] for r in rows)
bi = sum(r["condition_b"]["api_input_tokens"] for r in rows)
bo = sum(r["condition_b"]["api_output_tokens"] for r in rows)
by_bucket[bucket] = {
"n": len(rows),
"input_delta_pct": round((bi - ai) / ai * 100, 2) if ai else None,
"output_delta_pct": round((bo - ao) / ao * 100, 2) if ao else None,
"a_avg_entities": avg_metric(rows, "condition_a", "n_entities"),
"b_avg_entities": avg_metric(rows, "condition_b", "n_entities"),
"a_avg_edges": avg_metric(rows, "condition_a", "n_edges"),
"b_avg_edges": avg_metric(rows, "condition_b", "n_edges"),
"a_avg_predicate_diversity": avg_metric(rows, "condition_a", "predicate_diversity"),
"b_avg_predicate_diversity": avg_metric(rows, "condition_b", "predicate_diversity"),
"a_avg_type_diversity": avg_metric(rows, "condition_a", "type_diversity"),
"b_avg_type_diversity": avg_metric(rows, "condition_b", "type_diversity"),
"a_avg_degree": avg_metric(rows, "condition_a", "avg_degree"),
"b_avg_degree": avg_metric(rows, "condition_b", "avg_degree"),
"a_avg_largest_component_pct": avg_metric(rows, "condition_a", "largest_component_pct"),
"b_avg_largest_component_pct": avg_metric(rows, "condition_b", "largest_component_pct"),
}
summary = {
"experiment": "base_class_test",
"title": "Base-Class Enrichment — OOP Framing",
"started_at": started_at,
"completed_at": datetime.now(timezone.utc).isoformat(),
"haiku_model": HAIKU_MODEL,
"local_model": LOCAL_MODEL,
"max_doc_chars": MAX_DOC_CHARS,
"n_documents": len(sample),
"n_valid_pairs": len(valid),
"total_elapsed_s": total_elapsed,
"totals": {
"a_input_tokens": a_in,
"a_output_tokens": a_out,
"b_input_tokens": b_in,
"b_output_tokens": b_out,
"a_cost_usd": round(a_cost, 4),
"b_cost_usd": round(b_cost, 4),
"cost_delta_usd": round(b_cost - a_cost, 4),
"cost_delta_pct": round((b_cost - a_cost) / a_cost * 100, 2) if a_cost else None,
"note": "API cost only — local Mistral runtime on VPS not monetized",
},
"by_size_bucket": by_bucket,
"results": results,
}
OUTPUT_FILE.parent.mkdir(parents=True, exist_ok=True)
with open(OUTPUT_FILE, "w") as f:
json.dump(summary, f, indent=2)
print()
print("=" * 60)
print(f"DONE — {len(valid)}/{len(sample)} valid pairs in {total_elapsed}s")
print(f"A total cost: ${a_cost:.4f} (in={a_in} out={a_out})")
print(f"B total cost: ${b_cost:.4f} (in={b_in} out={b_out})")
delta_pct = summary['totals']['cost_delta_pct']
if delta_pct is not None:
verdict = "B cheaper" if delta_pct < 0 else "B more expensive"
print(f"Cost delta: {delta_pct:+.2f}% ({verdict})")
print()
print("By bucket — graph metrics (A vs B):")
for bucket, stats in by_bucket.items():
if stats:
print(f" {bucket:6s} (n={stats['n']}):")
print(f" cost: in {stats['input_delta_pct']:+.1f}% out {stats['output_delta_pct']:+.1f}%")
print(f" entities: A={stats['a_avg_entities']} B={stats['b_avg_entities']}")
print(f" edges: A={stats['a_avg_edges']} B={stats['b_avg_edges']}")
print(f" predicate diversity: A={stats['a_avg_predicate_diversity']} B={stats['b_avg_predicate_diversity']}")
print(f" type diversity: A={stats['a_avg_type_diversity']} B={stats['b_avg_type_diversity']}")
print(f" avg degree: A={stats['a_avg_degree']} B={stats['b_avg_degree']}")
print(f" largest component %: A={stats['a_avg_largest_component_pct']} B={stats['b_avg_largest_component_pct']}")
print()
print(f"Results: {OUTPUT_FILE}")
if __name__ == "__main__":
main()
+611
View File
@@ -0,0 +1,611 @@
#!/usr/bin/env python3
"""
Base-Class Enrichment Test — OOP Framing Experiment
Tests whether non-entity metadata from a local model (domain class, structural
signals, presence flags, length, summary) can take load off the API without
constraining what it extracts.
The local model does NOT draft entities. The API still does full extraction.
The local model produces metadata that orients the API's reading.
Conditions:
A — Baseline: single Claude Haiku call, full extraction, no metadata
B — Base-class: Mistral metadata + Haiku full extraction with metadata as frame
Critical test: B's edge count and predicate diversity must be ≥A's, or close.
If B produces fewer edges or less predicate diversity, metadata is acting as
constraint and the OOP framing is falsified.
Sample: 50 docs from briefing_test_v2_results.json:
- 15 small (<1000 chars)
- 25 medium (1000-5000 chars)
- 10 large (5000-12000 chars, capped at 12K)
Outputs: ~/aaronai/experiments/base_class_validation_results.json
"""
import json
import os
import re
import statistics
import sys
import time
from datetime import datetime, timezone
from pathlib import Path
import anthropic
import psycopg2
import requests
from dotenv import load_dotenv
load_dotenv(Path.home() / "aaronai" / ".env")
V2_FILE = Path.home() / "aaronai" / "briefing_test_v2_results.json"
OUTPUT_FILE = Path.home() / "aaronai" / "experiments" / "base_class_validation_results.json"
HAIKU_MODEL = "claude-haiku-4-5-20251001"
HAIKU_MAX_TOKENS = 8192
HAIKU_TEMPERATURE = 0.0
OLLAMA_URL = "http://localhost:11434/api/generate"
LOCAL_MODEL = "mistral"
LOCAL_TIMEOUT = 180
MAX_DOC_CHARS = 12000
HAIKU_IN_PER_M = 1.0
HAIKU_OUT_PER_M = 5.0
CONDITION_A_PROMPT = """Extract a knowledge graph from the document below.
Return ONLY valid JSON with this exact schema:
{
"entities": [
{"name": string, "type": string}
],
"edges": [
{"subject": string, "predicate": string, "object": string}
]
}
Entity types: use whatever fits the entity. Do not constrain yourself to a fixed list.
Edge predicates: natural language phrases that capture the actual relationship the document states or implies.
Extract every entity and every relationship the document states or strongly implies. Both subject and object in every edge must appear in entities. JSON only, no commentary, no markdown fences.
DOCUMENT:
"""
LOCAL_METADATA_PROMPT = """Analyze the document below and produce metadata describing its surface features. Do NOT extract entities. Do NOT identify content. Only produce structural and surface-level metadata.
Return ONLY valid JSON with this exact schema:
{
"language": "en or other",
"char_length": integer,
"primary_format": "prose, presentation, list, form, code, or mixed",
"structural_signals": {
"has_headings": boolean,
"has_bullet_lists": boolean,
"has_numbered_lists": boolean,
"has_tables": boolean,
"has_code_blocks": boolean,
"has_dates": boolean
},
"content_signals": {
"has_named_people": boolean,
"has_institutional_language": boolean,
"has_technical_terminology": boolean,
"has_first_person": boolean,
"has_quotations": boolean
},
"domain_class": "technical, administrative, personal, educational, creative, reference, or mixed",
"one_sentence_summary": "string of 25 words or fewer describing what the document is about"
}
JSON only, no commentary.
DOCUMENT:
"""
CONDITION_B_API_PROMPT = """You are extracting a knowledge graph from a document. The document has been pre-analyzed by a local model and the following metadata is provided as orienting context — not as constraint. Extract every entity and every relationship in the document. Do not limit your extraction to what the metadata suggests; the metadata is here to orient your reading, not to bound it.
DOCUMENT METADATA:
{metadata_json}
Return ONLY valid JSON with this exact schema:
{
"entities": [
{"name": string, "type": string}
],
"edges": [
{"subject": string, "predicate": string, "object": string}
]
}
Entity types: use whatever fits. Edge predicates: natural language phrases capturing the actual relationship. Both subject and object in every edge must appear in entities. Extract every entity and every relationship the document states or strongly implies. Do not filter for salience. JSON only, no commentary, no markdown fences.
DOCUMENT:
"""
def strip_json_fences(text):
if not text:
return ""
t = text.strip()
t = re.sub(r"^```(?:json)?\s*", "", t)
t = re.sub(r"\s*```$", "", t)
return t.strip()
def fetch_document_text(pg_conn, source):
cur = pg_conn.cursor()
cur.execute(
"SELECT document FROM embeddings WHERE source = %s ORDER BY id",
(source,),
)
rows = cur.fetchall()
cur.close()
if not rows:
return None, 0
full = "\n\n".join(r[0] for r in rows)
return full[:MAX_DOC_CHARS], len(full)
def call_haiku(client, prompt_text):
t0 = time.time()
resp = client.messages.create(
model=HAIKU_MODEL,
max_tokens=HAIKU_MAX_TOKENS,
temperature=HAIKU_TEMPERATURE,
messages=[{"role": "user", "content": prompt_text}],
)
return {
"input_tokens": resp.usage.input_tokens,
"output_tokens": resp.usage.output_tokens,
"latency_s": round(time.time() - t0, 2),
"response_text": resp.content[0].text if resp.content else "",
"stop_reason": resp.stop_reason,
}
def call_local_metadata(document_text):
t0 = time.time()
try:
resp = requests.post(
OLLAMA_URL,
json={
"model": LOCAL_MODEL,
"prompt": LOCAL_METADATA_PROMPT + document_text,
"stream": False,
"format": "json",
"options": {"num_predict": 1024, "temperature": 0, "num_ctx": 12288},
},
timeout=LOCAL_TIMEOUT,
)
resp.raise_for_status()
return {
"response": resp.json().get("response", ""),
"latency_s": round(time.time() - t0, 2),
}
except Exception as e:
return {"error": str(e), "latency_s": round(time.time() - t0, 2)}
def parse_graph_full(raw):
"""Return (entities_list, edges_list, parsed_ok). Lists for metric computation."""
cleaned = strip_json_fences(raw)
if not cleaned:
return None, None, False
try:
data = json.loads(cleaned)
except json.JSONDecodeError:
return None, None, False
if not isinstance(data, dict):
return None, None, False
ents = data.get("entities")
edges = data.get("edges")
if isinstance(ents, list) and isinstance(edges, list):
return ents, edges, True
return None, None, False
def parse_metadata(raw):
cleaned = strip_json_fences(raw)
if not cleaned:
return None
try:
return json.loads(cleaned)
except json.JSONDecodeError:
return None
def graph_metrics(entities, edges):
"""Compute graph quality metrics. Inputs are lists from parse_graph_full."""
if entities is None or edges is None:
return None
n_entities = len(entities)
n_edges = len(edges)
# Predicate diversity
predicates = set()
for e in edges:
if isinstance(e, dict):
p = e.get("predicate")
if p:
predicates.add(str(p).strip().lower())
predicate_diversity = len(predicates)
# Entity type diversity
types = set()
for ent in entities:
if isinstance(ent, dict):
t = ent.get("type")
if t:
types.add(str(t).strip().lower())
type_diversity = len(types)
# Average degree (edges*2 / entities — each edge touches two nodes)
avg_degree = (2 * n_edges / n_entities) if n_entities > 0 else 0.0
# Largest connected component
# Build adjacency from edges
entity_names = set()
for ent in entities:
if isinstance(ent, dict):
n = ent.get("name")
if n:
entity_names.add(str(n).strip().lower())
adj = {name: set() for name in entity_names}
for e in edges:
if not isinstance(e, dict):
continue
s = str(e.get("subject", "")).strip().lower()
o = str(e.get("object", "")).strip().lower()
if s in adj and o in adj:
adj[s].add(o)
adj[o].add(s)
# BFS for largest component
visited = set()
largest = 0
for start in adj:
if start in visited:
continue
component = 0
stack = [start]
while stack:
node = stack.pop()
if node in visited:
continue
visited.add(node)
component += 1
for neighbor in adj[node]:
if neighbor not in visited:
stack.append(neighbor)
if component > largest:
largest = component
return {
"n_entities": n_entities,
"n_edges": n_edges,
"predicate_diversity": predicate_diversity,
"type_diversity": type_diversity,
"avg_degree": round(avg_degree, 2),
"largest_component": largest,
"largest_component_pct": round(100 * largest / n_entities, 1) if n_entities else 0.0,
}
def stratify(docs):
"""Pick small + medium from v2; large bucket is loaded separately from
large_bucket_sources.json (sampled fresh from pgvector since v2 has no large docs)."""
sized = [(d, d["content_length"]) for d in docs]
small = [d for d, n in sized if n < 1000][:15]
medium = [d for d, n in sized if 1000 <= n < 5000][:25]
# Load large bucket from external sources file
import json as _json
large_sources_file = Path.home() / "aaronai" / "large_bucket_sources.json"
if large_sources_file.exists():
large_source_names = _json.loads(large_sources_file.read_text())
# Synthesize doc_meta entries for the large sources
large = [{"source": s, "content_length": 0, "status": "SUCCESS"}
for s in large_source_names]
print(f"Stratify: 15 small + 25 medium from v2, 10 large from large_bucket_sources.json")
else:
large = []
print("WARN: large_bucket_sources.json not found, no large docs in sample")
return small + medium + large
def fmt_metrics(m):
if m is None:
return "n/a"
return (f"e={m['n_entities']} edge={m['n_edges']} "
f"pred={m['predicate_diversity']} type={m['type_diversity']} "
f"deg={m['avg_degree']} comp={m['largest_component']}/{m['n_entities']}")
def main():
api_key = os.environ.get("ANTHROPIC_API_KEY")
pg_dsn = os.environ.get("PG_DSN")
if not api_key or not pg_dsn:
print("ERROR: ANTHROPIC_API_KEY or PG_DSN not set", file=sys.stderr)
sys.exit(1)
if not V2_FILE.exists():
print(f"ERROR: {V2_FILE} not found", file=sys.stderr)
sys.exit(1)
with open(V2_FILE) as f:
v2 = json.load(f)
docs_meta = [d for d in v2["documents"] if d.get("status") == "SUCCESS"]
sample = stratify(docs_meta)
print(f"Sample: {len(sample)} docs (15s/25m/10l, file order)")
print(f"Mistral context: 12288 tokens, doc cap {MAX_DOC_CHARS} chars")
print(f"Haiku model: {HAIKU_MODEL} temp={HAIKU_TEMPERATURE}")
print(f"Test: base-class metadata as orienting frame, NOT entity drafting")
print()
client = anthropic.Anthropic(api_key=api_key)
pg_conn = psycopg2.connect(pg_dsn)
results = []
started_at = datetime.now(timezone.utc).isoformat()
t_total = time.time()
for i, doc_meta in enumerate(sample, 1):
source = doc_meta["source"]
doc_text, original_len = fetch_document_text(pg_conn, source)
if not doc_text:
print(f"[{i:02d}/{len(sample)}] {source[:55]} — SKIP (not in pgvector)")
results.append({"source": source, "skipped": "not_in_pgvector"})
continue
sent_len = len(doc_text)
truncated = original_len > sent_len
size_bucket = (
"small" if sent_len < 1000
else "medium" if sent_len < 5000
else "large"
)
trunc_marker = "*" if truncated else " "
print(f"[{i:02d}/{len(sample)}] [{size_bucket:6s}] [{sent_len:>5}c{trunc_marker}] {source[:55]}", flush=True)
# Condition A
try:
a = call_haiku(client, CONDITION_A_PROMPT + doc_text)
a_ents, a_edges, a_ok = parse_graph_full(a["response_text"])
a_metrics = graph_metrics(a_ents, a_edges) if a_ok else None
print(f" A: in={a['input_tokens']} out={a['output_tokens']} "
f"stop={a['stop_reason']} t={a['latency_s']}s", flush=True)
print(f" {fmt_metrics(a_metrics)}", flush=True)
except Exception as e:
print(f" A FAILED: {e}", flush=True)
a = {"error": str(e)}
a_metrics = None
# Condition B local metadata pass
local_result = call_local_metadata(doc_text)
if "error" in local_result:
print(f" B local FAILED: {local_result['error']}", flush=True)
results.append({
"source": source,
"size_bucket": size_bucket,
"doc_chars_original": original_len,
"doc_chars_sent": sent_len,
"truncated": truncated,
"condition_a": {
"input_tokens": a.get("input_tokens"),
"output_tokens": a.get("output_tokens"),
"latency_s": a.get("latency_s"),
"metrics": a_metrics,
"stop_reason": a.get("stop_reason"),
"response_text": a.get("response_text", "")[:32000],
"error": a.get("error"),
},
"condition_b": {
"skipped": "local_model_failed",
"local_error": local_result["error"],
"local_latency_s": local_result.get("latency_s"),
},
})
continue
local_raw = local_result["response"]
metadata = parse_metadata(local_raw)
# Override LLM-hallucinated char_length with Python-computed truth
if metadata is not None and isinstance(metadata, dict):
metadata["char_length"] = len(doc_text)
print(f" B local: t={local_result['latency_s']}s metadata_parsed={metadata is not None}",
flush=True)
if metadata is None:
print(f" B: metadata parse failed — skipping API call", flush=True)
results.append({
"source": source,
"size_bucket": size_bucket,
"doc_chars_original": original_len,
"doc_chars_sent": sent_len,
"truncated": truncated,
"condition_a": {
"input_tokens": a.get("input_tokens"),
"output_tokens": a.get("output_tokens"),
"latency_s": a.get("latency_s"),
"metrics": a_metrics,
"stop_reason": a.get("stop_reason"),
"response_text": a.get("response_text", "")[:32000],
"error": a.get("error"),
},
"condition_b": {
"skipped": "metadata_parse_failed",
"local_latency_s": local_result.get("latency_s"),
"local_raw": local_raw[:1000],
},
})
continue
metadata_json = json.dumps(metadata, ensure_ascii=False, indent=2)
b_prompt = CONDITION_B_API_PROMPT.replace("{metadata_json}", metadata_json) + doc_text
try:
b = call_haiku(client, b_prompt)
b_ents, b_edges, b_ok = parse_graph_full(b["response_text"])
b_metrics = graph_metrics(b_ents, b_edges) if b_ok else None
print(f" B api: in={b['input_tokens']} out={b['output_tokens']} "
f"stop={b['stop_reason']} t={b['latency_s']}s", flush=True)
print(f" {fmt_metrics(b_metrics)}", flush=True)
except Exception as e:
print(f" B api FAILED: {e}", flush=True)
b = {"error": str(e)}
b_metrics = None
# Per-doc deltas
if "input_tokens" in a and "input_tokens" in b:
in_pct = (b["input_tokens"] - a["input_tokens"]) / a["input_tokens"] * 100 if a["input_tokens"] else 0.0
out_pct = (b["output_tokens"] - a["output_tokens"]) / a["output_tokens"] * 100 if a["output_tokens"] else 0.0
edge_pct_str = "n/a"
pred_pct_str = "n/a"
if a_metrics and b_metrics:
if a_metrics["n_edges"] > 0:
edge_pct_str = f"{(b_metrics['n_edges'] - a_metrics['n_edges']) / a_metrics['n_edges'] * 100:+.1f}%"
if a_metrics["predicate_diversity"] > 0:
pred_pct_str = f"{(b_metrics['predicate_diversity'] - a_metrics['predicate_diversity']) / a_metrics['predicate_diversity'] * 100:+.1f}%"
print(f" Δ in={in_pct:+.1f}% out={out_pct:+.1f}% edges={edge_pct_str} pred={pred_pct_str}",
flush=True)
results.append({
"source": source,
"size_bucket": size_bucket,
"doc_chars_original": original_len,
"doc_chars_sent": sent_len,
"truncated": truncated,
"condition_a": {
"input_tokens": a.get("input_tokens"),
"output_tokens": a.get("output_tokens"),
"latency_s": a.get("latency_s"),
"metrics": a_metrics,
"stop_reason": a.get("stop_reason"),
"response_text": a.get("response_text", "")[:32000],
"error": a.get("error"),
},
"condition_b": {
"local_latency_s": local_result.get("latency_s"),
"local_metadata": metadata,
"local_raw": local_raw[:1000],
"api_input_tokens": b.get("input_tokens"),
"api_output_tokens": b.get("output_tokens"),
"api_latency_s": b.get("latency_s"),
"metrics": b_metrics,
"stop_reason": b.get("stop_reason"),
"response_text": b.get("response_text", "")[:32000],
"error": b.get("error"),
},
})
pg_conn.close()
total_elapsed = round(time.time() - t_total, 1)
valid = [r for r in results
if r.get("condition_a", {}).get("metrics") is not None
and r.get("condition_b", {}).get("metrics") is not None]
a_in = sum(r["condition_a"]["input_tokens"] for r in valid)
a_out = sum(r["condition_a"]["output_tokens"] for r in valid)
b_in = sum(r["condition_b"]["api_input_tokens"] for r in valid)
b_out = sum(r["condition_b"]["api_output_tokens"] for r in valid)
a_cost = (a_in * HAIKU_IN_PER_M + a_out * HAIKU_OUT_PER_M) / 1_000_000
b_cost = (b_in * HAIKU_IN_PER_M + b_out * HAIKU_OUT_PER_M) / 1_000_000
def avg_metric(rows, condition, key):
vals = [r[condition]["metrics"][key] for r in rows if r[condition]["metrics"]]
return round(statistics.mean(vals), 2) if vals else None
by_bucket = {}
for bucket in ("small", "medium", "large"):
rows = [r for r in valid if r["size_bucket"] == bucket]
if not rows:
by_bucket[bucket] = None
continue
ai = sum(r["condition_a"]["input_tokens"] for r in rows)
ao = sum(r["condition_a"]["output_tokens"] for r in rows)
bi = sum(r["condition_b"]["api_input_tokens"] for r in rows)
bo = sum(r["condition_b"]["api_output_tokens"] for r in rows)
by_bucket[bucket] = {
"n": len(rows),
"input_delta_pct": round((bi - ai) / ai * 100, 2) if ai else None,
"output_delta_pct": round((bo - ao) / ao * 100, 2) if ao else None,
"a_avg_entities": avg_metric(rows, "condition_a", "n_entities"),
"b_avg_entities": avg_metric(rows, "condition_b", "n_entities"),
"a_avg_edges": avg_metric(rows, "condition_a", "n_edges"),
"b_avg_edges": avg_metric(rows, "condition_b", "n_edges"),
"a_avg_predicate_diversity": avg_metric(rows, "condition_a", "predicate_diversity"),
"b_avg_predicate_diversity": avg_metric(rows, "condition_b", "predicate_diversity"),
"a_avg_type_diversity": avg_metric(rows, "condition_a", "type_diversity"),
"b_avg_type_diversity": avg_metric(rows, "condition_b", "type_diversity"),
"a_avg_degree": avg_metric(rows, "condition_a", "avg_degree"),
"b_avg_degree": avg_metric(rows, "condition_b", "avg_degree"),
"a_avg_largest_component_pct": avg_metric(rows, "condition_a", "largest_component_pct"),
"b_avg_largest_component_pct": avg_metric(rows, "condition_b", "largest_component_pct"),
}
summary = {
"experiment": "base_class_test",
"title": "Base-Class Enrichment — OOP Framing",
"started_at": started_at,
"completed_at": datetime.now(timezone.utc).isoformat(),
"haiku_model": HAIKU_MODEL,
"local_model": LOCAL_MODEL,
"max_doc_chars": MAX_DOC_CHARS,
"n_documents": len(sample),
"n_valid_pairs": len(valid),
"total_elapsed_s": total_elapsed,
"totals": {
"a_input_tokens": a_in,
"a_output_tokens": a_out,
"b_input_tokens": b_in,
"b_output_tokens": b_out,
"a_cost_usd": round(a_cost, 4),
"b_cost_usd": round(b_cost, 4),
"cost_delta_usd": round(b_cost - a_cost, 4),
"cost_delta_pct": round((b_cost - a_cost) / a_cost * 100, 2) if a_cost else None,
"note": "API cost only — local Mistral runtime on VPS not monetized",
},
"by_size_bucket": by_bucket,
"results": results,
}
OUTPUT_FILE.parent.mkdir(parents=True, exist_ok=True)
with open(OUTPUT_FILE, "w") as f:
json.dump(summary, f, indent=2)
print()
print("=" * 60)
print(f"DONE — {len(valid)}/{len(sample)} valid pairs in {total_elapsed}s")
print(f"A total cost: ${a_cost:.4f} (in={a_in} out={a_out})")
print(f"B total cost: ${b_cost:.4f} (in={b_in} out={b_out})")
delta_pct = summary['totals']['cost_delta_pct']
if delta_pct is not None:
verdict = "B cheaper" if delta_pct < 0 else "B more expensive"
print(f"Cost delta: {delta_pct:+.2f}% ({verdict})")
print()
print("By bucket — graph metrics (A vs B):")
for bucket, stats in by_bucket.items():
if stats:
print(f" {bucket:6s} (n={stats['n']}):")
print(f" cost: in {stats['input_delta_pct']:+.1f}% out {stats['output_delta_pct']:+.1f}%")
print(f" entities: A={stats['a_avg_entities']} B={stats['b_avg_entities']}")
print(f" edges: A={stats['a_avg_edges']} B={stats['b_avg_edges']}")
print(f" predicate diversity: A={stats['a_avg_predicate_diversity']} B={stats['b_avg_predicate_diversity']}")
print(f" type diversity: A={stats['a_avg_type_diversity']} B={stats['b_avg_type_diversity']}")
print(f" avg degree: A={stats['a_avg_degree']} B={stats['b_avg_degree']}")
print(f" largest component %: A={stats['a_avg_largest_component_pct']} B={stats['b_avg_largest_component_pct']}")
print()
print(f"Results: {OUTPUT_FILE}")
if __name__ == "__main__":
main()
+376
View File
@@ -0,0 +1,376 @@
#!/usr/bin/env python3
"""
BirdAI Briefing Generator v2 — Experiment 002b
===============================================
Changes from v1 (based on Experiment 004 human evaluation):
- document_type now pre-classified by rule, not by model
- Capture template header stripped before model sees content
- noise_signals constrained to controlled vocabulary
- Model prompt simplified — focuses only on reliable signal fields
- Expanded document type vocabulary for BirdAI-specific types
Results written to ~/aaronai/briefing_test_v2_results.json
"""
import json
import os
import re
import urllib.request
import urllib.error
import psycopg2
import psycopg2.extras
import hashlib
import time
from datetime import datetime, timedelta
from dotenv import load_dotenv
load_dotenv(os.path.expanduser("~/aaronai/.env"))
PG_DSN = os.getenv("PG_DSN")
RESULTS_FILE = os.path.expanduser("~/aaronai/briefing_test_v2_results.json")
MODEL = "mistral"
SAMPLE_SIZE = 50
OLLAMA_URL = "http://localhost:11434/api/generate"
VALID_DOC_TYPES = {
"voice_capture", "image_capture",
"dream_nrem", "dream_rem", "dream_lucid", "dream_synthesis",
"presentation", "code", "spreadsheet",
"academic_pdf", "technical_doc", "chat_log",
"book_excerpt", "form", "syllabus", "email",
"notes", "purchase_order", "annual_report",
"invoice", "memo", "report", "unknown"
}
VALID_DENSITIES = {"high", "medium", "low"}
VALID_PRIORITIES = {"full", "partial", "skip"}
VALID_NOISE_SIGNALS = {
"repeated_headers", "page_numbers", "formatting_artifacts",
"boilerplate", "watermarks", "footers", "line_numbers",
"encoding_artifacts", "ocr_errors"
}
VALID_STRUCTURE_SIGNALS = {
"headings", "bullet_lists", "numbered_lists", "tables",
"code_blocks", "citations", "footnotes", "images",
"forms", "columns", "sections"
}
def pre_classify_document(source, content):
filename = os.path.basename(source).lower()
doc_type = None
cleaned_content = content
if "---" in content:
parts = content.split("---", 1)
header = parts[0].lower()
body = parts[1].strip() if len(parts) > 1 else content
if any(marker in header for marker in ["**type:**", "**modality:**", "# capture", "# dream"]):
cleaned_content = body if body else content
if "nrem" in filename:
doc_type = "dream_nrem"
elif "lucid" in filename:
doc_type = "dream_lucid"
elif "-rem-" in filename or filename.endswith("-rem.md"):
doc_type = "dream_rem"
elif "synthesis" in filename and filename.endswith(".md"):
doc_type = "dream_synthesis"
elif "-voice" in filename or "voice-" in filename:
doc_type = "voice_capture"
elif "-image" in filename or "image-" in filename:
doc_type = "image_capture"
elif filename.endswith(".pptx") or filename.endswith(".ppt"):
doc_type = "presentation"
elif filename.endswith(".xlsx") or filename.endswith(".xls") or filename.endswith(".csv"):
doc_type = "spreadsheet"
elif any(filename.endswith(ext) for ext in [".py", ".js", ".ts", ".cpp", ".c", ".h", ".java", ".rs"]):
doc_type = "code"
elif filename.endswith("cmakelists.txt") or filename == "makefile":
doc_type = "code"
elif content.startswith("# Dream"):
if "nrem" in content[:50].lower():
doc_type = "dream_nrem"
elif "lucid" in content[:50].lower():
doc_type = "dream_lucid"
elif "rem" in content[:50].lower():
doc_type = "dream_rem"
else:
doc_type = "dream_synthesis"
elif content.startswith("# Capture"):
doc_type = "voice_capture" if "voice" in content[:100].lower() else "image_capture"
return doc_type, cleaned_content
def build_briefing_prompt(content, pre_classified_type=None):
if pre_classified_type:
type_instruction = f'\n "document_type": "{pre_classified_type}", // pre-classified, do not change'
else:
type_instruction = '\n "document_type": "one of: academic_pdf, technical_doc, chat_log, book_excerpt, form, syllabus, email, notes, purchase_order, annual_report, invoice, memo, report, unknown",'
return f"""Analyze this document and return a JSON briefing. No explanation, no prose, JSON only.
Return exactly this structure:
{{{type_instruction}
"primary_language": "language code e.g. en, fr, de",
"density": "one of: high, medium, low",
"has_proper_nouns": true or false,
"has_dates": true or false,
"has_numeric_data": true or false,
"has_institutional_language": true or false,
"has_technical_terms": true or false,
"likely_has_named_entities": true or false,
"structure_signals": [],
"noise_signals": [],
"extraction_priority": "one of: full, partial, skip"
}}
Rules:
- density: high=information dense technical or academic, medium=mixed, low=narrative/literary/sparse/short
- has_proper_nouns: true if you see capitalized words that are NOT sentence starts or template headers
- has_dates: true if you see date patterns (numbers with months, years, slashes)
- has_numeric_data: true if you see measurements, percentages, statistics
- has_institutional_language: true if you see words like university, department, policy, committee, grant
- has_technical_terms: true if you see domain-specific jargon or acronyms
- likely_has_named_entities: true if has_proper_nouns is true
- structure_signals: use ONLY these terms: headings, bullet_lists, numbered_lists, tables, code_blocks, citations, footnotes, images, forms, columns, sections
- noise_signals: use ONLY these terms: repeated_headers, page_numbers, formatting_artifacts, boilerplate, watermarks, footers, line_numbers, encoding_artifacts, ocr_errors
- extraction_priority: full if density=high and likely_has_named_entities=true; skip if density=low AND likely_has_named_entities=false AND content is under 200 words; partial otherwise
Document:
{content[:1500]}"""
def get_sample_documents():
if not PG_DSN:
raise RuntimeError("PG_DSN not found in .env — cannot connect to database")
conn = psycopg2.connect(PG_DSN)
cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
cur.execute("""
SELECT DISTINCT ON (source) id, document, source, created_at
FROM embeddings
WHERE length(document) > 100
AND length(document) < 3000
ORDER BY source, random()
LIMIT %s
""", (SAMPLE_SIZE,))
docs = cur.fetchall()
cur.close()
conn.close()
return docs
def run_briefing(prompt):
payload = json.dumps({"model": MODEL, "prompt": prompt, "stream": False}).encode()
raw = ""
try:
req = urllib.request.Request(OLLAMA_URL, data=payload, headers={"Content-Type": "application/json"})
with urllib.request.urlopen(req, timeout=180) as resp:
result = json.loads(resp.read().decode())
raw = result.get("response", "").strip()
start = raw.find("{")
end = raw.rfind("}") + 1
if start == -1 or end == 0:
return None, f"NO_JSON: {raw[:200]}"
parsed = json.loads(raw[start:end])
if not isinstance(parsed, dict):
return None, f"NOT_DICT: {raw[:100]}"
return parsed, raw
except urllib.error.URLError as e:
return None, f"URL_ERROR: {e}"
except TimeoutError:
return None, "TIMEOUT"
except json.JSONDecodeError as e:
return None, f"JSON_ERROR: {e} | raw: {raw[:200]}"
except Exception as e:
return None, f"ERROR: {type(e).__name__}: {e}"
def sanitize_briefing(briefing, pre_classified_type=None):
safe = {}
if pre_classified_type:
safe["document_type"] = pre_classified_type
else:
dt = str(briefing.get("document_type", "unknown")).lower().strip()
safe["document_type"] = dt if dt in VALID_DOC_TYPES else "unknown"
safe["primary_language"] = str(briefing.get("primary_language", "en")).lower().strip()[:10]
density = str(briefing.get("density", "medium")).lower().strip()
safe["density"] = density if density in VALID_DENSITIES else "medium"
for field in ["has_proper_nouns", "has_dates", "has_numeric_data",
"has_institutional_language", "has_technical_terms", "likely_has_named_entities"]:
val = briefing.get(field, False)
if isinstance(val, bool):
safe[field] = val
elif isinstance(val, str):
safe[field] = val.lower() in ("true", "yes", "1")
else:
safe[field] = bool(val)
for field, valid_set in [("structure_signals", VALID_STRUCTURE_SIGNALS),
("noise_signals", VALID_NOISE_SIGNALS)]:
val = briefing.get(field, [])
if isinstance(val, list):
safe[field] = [str(v).lower().strip() for v in val if str(v).lower().strip() in valid_set]
elif isinstance(val, str) and val.lower().strip() in valid_set:
safe[field] = [val.lower().strip()]
else:
safe[field] = []
priority = str(briefing.get("extraction_priority", "partial")).lower().strip()
safe["extraction_priority"] = priority if priority in VALID_PRIORITIES else "partial"
return safe
def estimate_token_reduction(original_text, briefing):
original_tokens = max(len(original_text) / 4, 1)
orientation_saved = 200
if briefing.get("extraction_priority") == "skip":
return {"original_tokens_approx": round(original_tokens),
"orientation_tokens_saved": round(original_tokens + 200),
"noise_reduction_pct": 100.0, "total_reduction_pct": 100.0,
"note": "skip — no API call"}
noise_count = len(briefing.get("noise_signals", []))
noise_reduction_pct = min(noise_count * 0.05, 0.40)
noise_tokens_saved = original_tokens * noise_reduction_pct
total_saved = orientation_saved + noise_tokens_saved
reduction_pct = min((total_saved / (original_tokens + 200)) * 100, 99.0)
return {"original_tokens_approx": round(original_tokens),
"orientation_tokens_saved": orientation_saved,
"noise_tokens_saved": round(noise_tokens_saved),
"noise_reduction_pct": round(noise_reduction_pct * 100, 1),
"total_reduction_pct": round(reduction_pct, 1)}
def format_eta(elapsed_times, completed, total):
if completed == 0:
return "ETA: --:--"
avg = sum(elapsed_times) / completed
eta = timedelta(seconds=int((total - completed) * avg))
return f"ETA: {str(eta)}"
def content_hash(text):
return hashlib.md5(text.encode()).hexdigest()[:8]
def main():
test_start = time.time()
print(f"\nBirdAI Briefing Generator v2 — Experiment 002b")
print(f"Model: {MODEL} | Sample: {SAMPLE_SIZE} docs (distinct sources)")
print(f"Changes: rule-based doc_type, template stripping, controlled vocab")
print(f"Started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"Results: {RESULTS_FILE}")
print("-" * 75)
docs = get_sample_documents()
print(f"Loaded {len(docs)} distinct source documents from pgvector\n")
results = {
"meta": {"model": MODEL, "version": "v2", "sample_size": len(docs),
"started": datetime.now().isoformat(), "completed": None,
"total_elapsed_seconds": None, "avg_seconds_per_doc": None},
"documents": [], "summary": {}
}
success_count = 0
failed_count = 0
pre_classified_count = 0
priority_counts = {"full": 0, "partial": 0, "skip": 0}
total_reduction_pct = 0.0
elapsed_times = []
for i, doc in enumerate(docs):
doc_id = doc["id"]
content = doc["document"]
source = doc.get("source", "unknown")
chash = content_hash(content)
pre_type, cleaned_content = pre_classify_document(source, content)
was_pre_classified = pre_type is not None
if was_pre_classified:
pre_classified_count += 1
eta_str = format_eta(elapsed_times, i, len(docs))
pre_flag = "R" if was_pre_classified else "M"
print(f"[{i+1:02d}/{len(docs)}][{pre_flag}] {source[:36]:<36} {eta_str:<14}", end=" ", flush=True)
prompt = build_briefing_prompt(cleaned_content, pre_type)
t_start = time.time()
briefing, raw = run_briefing(prompt)
elapsed = round(time.time() - t_start, 1)
elapsed_times.append(elapsed)
if briefing is None:
failed_count += 1
print(f"→ FAILED {elapsed}s | {raw[:50]}")
results["documents"].append({
"id": doc_id, "source": source, "content_hash": chash,
"content_length": len(content), "status": "FAILED",
"pre_classified_type": pre_type, "error": raw, "elapsed_seconds": elapsed
})
else:
briefing = sanitize_briefing(briefing, pre_type)
success_count += 1
priority = briefing["extraction_priority"]
doc_type = briefing["document_type"]
density = briefing["density"]
priority_counts[priority] = priority_counts.get(priority, 0) + 1
reduction = estimate_token_reduction(cleaned_content, briefing)
total_reduction_pct += reduction["total_reduction_pct"]
print(f"{priority.upper():<7} {doc_type:<15} density:{density:<6} -{reduction['total_reduction_pct']:>5.1f}% {elapsed}s")
results["documents"].append({
"id": doc_id, "source": source, "content_hash": chash,
"content_length": len(content), "cleaned_content_length": len(cleaned_content),
"status": "SUCCESS", "pre_classified_type": pre_type,
"was_pre_classified": was_pre_classified, "elapsed_seconds": elapsed,
"briefing": briefing, "token_reduction_estimate": reduction
})
with open(RESULTS_FILE, "w") as f:
json.dump(results, f, indent=2, default=str)
total_elapsed = round(time.time() - test_start, 1)
avg_per_doc = round(total_elapsed / len(docs), 1) if docs else 0
completed_at = datetime.now().isoformat()
results["meta"]["completed"] = completed_at
results["meta"]["total_elapsed_seconds"] = total_elapsed
results["meta"]["avg_seconds_per_doc"] = avg_per_doc
total = len(docs)
avg_reduction = round(total_reduction_pct / success_count, 1) if success_count else 0
summary = {
"total": total, "success": success_count, "failed": failed_count,
"success_rate": round(success_count / total * 100, 1),
"pre_classified_by_rule": pre_classified_count,
"classified_by_model": total - pre_classified_count,
"extraction_priority_breakdown": priority_counts,
"avg_token_reduction_pct": avg_reduction,
"total_elapsed_seconds": total_elapsed, "avg_seconds_per_doc": avg_per_doc,
"projected_50_doc_minutes": round((avg_per_doc * 50) / 60, 1),
"approach_viable": success_count / total >= 0.8
}
results["summary"] = summary
with open(RESULTS_FILE, "w") as f:
json.dump(results, f, indent=2, default=str)
print("\n" + "=" * 75)
print(f"RESULTS — Briefing Generator v2")
print(f" Success rate: {success_count}/{total} ({summary['success_rate']}%)")
print(f" Failed: {failed_count}")
print(f" Pre-classified (rule): {pre_classified_count}")
print(f" Classified (model): {total - pre_classified_count}")
print(f" Priority — full: {priority_counts.get('full', 0)}")
print(f" Priority — partial: {priority_counts.get('partial', 0)}")
print(f" Priority — skip: {priority_counts.get('skip', 0)}")
print(f" Avg token reduction: {avg_reduction}%")
print(f" Total elapsed: {total_elapsed}s ({round(total_elapsed/60, 1)} min)")
print(f" Avg per document: {avg_per_doc}s")
print(f" Projected 50 docs: {summary['projected_50_doc_minutes']} min")
print(f" Approach viable: {'YES' if summary['approach_viable'] else 'NO'}")
print(f" Completed: {completed_at}")
print(f" Full results: {RESULTS_FILE}")
print("=" * 75)
if __name__ == "__main__":
main()
+508
View File
@@ -0,0 +1,508 @@
#!/usr/bin/env python3
"""
Cascade Optimization Test — skip-small + compressed-draft
Tests whether two optimizations on the entity-drafter cascade meaningfully
improve the savings ceiling beyond the prior unoptimized cascade (12.66%).
Optimizations:
A — Skip-small-docs routing: docs <1000 chars bypass the local pass entirely
B — Compressed draft format: bare JSON array instead of markdown bullets
Conditions:
A — Baseline: single Claude Haiku call, full extraction (unchanged from prior)
B — Optimized cascade: skip-small + compressed draft, otherwise same cascade
Sample: 30 docs from briefing_test_v2_results.json:
- 10 small (<1000 chars) — should show 0% delta if skip-small works
- 12 medium (1000-5000 chars) — primary test bucket
- 8 large (5000-12000 chars, capped at 12K)
Mistral context: 12K (raised from 8K in prior run).
Outputs: ~/aaronai/experiments/cascade_optimization_results.json
"""
import json
import os
import re
import statistics
import sys
import time
from datetime import datetime, timezone
from pathlib import Path
import anthropic
import psycopg2
import requests
from dotenv import load_dotenv
load_dotenv(Path.home() / "aaronai" / ".env")
V2_FILE = Path.home() / "aaronai" / "briefing_test_v2_results.json"
OUTPUT_FILE = Path.home() / "aaronai" / "experiments" / "cascade_optimization_results.json"
HAIKU_MODEL = "claude-haiku-4-5-20251001"
HAIKU_MAX_TOKENS = 4096
HAIKU_TEMPERATURE = 0.0
OLLAMA_URL = "http://localhost:11434/api/generate"
LOCAL_MODEL = "mistral"
LOCAL_TIMEOUT = 180 # raised — 12K context can take longer
MAX_DOC_CHARS = 12000 # raised from 8K
SKIP_SMALL_THRESHOLD = 1000
HAIKU_IN_PER_M = 1.0
HAIKU_OUT_PER_M = 5.0
CONDITION_A_PROMPT = """Extract a knowledge graph from the document below.
Return ONLY valid JSON with this exact schema:
{
"entities": [
{"name": string, "type": string}
],
"edges": [
{"subject": string, "predicate": string, "object": string}
]
}
Entity types: use whatever fits the entity. Do not constrain yourself to a fixed list.
Edge predicates: natural language phrases that capture the actual relationship the document states or implies.
Extract every entity and every relationship the document states or strongly implies. Both subject and object in every edge must appear in entities. JSON only, no commentary, no markdown fences.
DOCUMENT:
"""
LOCAL_PROMPT = """List every named entity that appears in the document below — every person, organization, place, project, document, material, technique, date, event, or other named thing.
Return ONLY valid JSON:
{
"candidates": [string]
}
Just names. No types, no relationships. JSON only.
DOCUMENT:
"""
# Compressed draft format — bare JSON array, minimal preamble
CONDITION_B_API_PROMPT_COMPRESSED = """Extract a knowledge graph from the document below.
Local model entity candidates (hint, not authoritative — verify against the document, ignore false ones, add missed ones):
{local_draft_json}
Return ONLY valid JSON with this exact schema:
{
"entities": [
{"name": string, "type": string}
],
"edges": [
{"subject": string, "predicate": string, "object": string}
]
}
Entity types: use whatever fits. Edge predicates: natural language phrases capturing the actual relationship. Both subject and object in every edge must appear in entities. Extract every entity and every relationship the document states or strongly implies. JSON only, no commentary, no markdown fences.
DOCUMENT:
"""
def strip_json_fences(text):
if not text:
return ""
t = text.strip()
t = re.sub(r"^```(?:json)?\s*", "", t)
t = re.sub(r"\s*```$", "", t)
return t.strip()
def fetch_document_text(pg_conn, source):
cur = pg_conn.cursor()
cur.execute(
"SELECT document FROM embeddings WHERE source = %s ORDER BY id",
(source,),
)
rows = cur.fetchall()
cur.close()
if not rows:
return None, 0
full = "\n\n".join(r[0] for r in rows)
return full[:MAX_DOC_CHARS], len(full)
def call_haiku(client, prompt_text):
t0 = time.time()
resp = client.messages.create(
model=HAIKU_MODEL,
max_tokens=HAIKU_MAX_TOKENS,
temperature=HAIKU_TEMPERATURE,
messages=[{"role": "user", "content": prompt_text}],
)
return {
"input_tokens": resp.usage.input_tokens,
"output_tokens": resp.usage.output_tokens,
"latency_s": round(time.time() - t0, 2),
"response_text": resp.content[0].text if resp.content else "",
"stop_reason": resp.stop_reason,
}
def call_local(document_text):
t0 = time.time()
try:
resp = requests.post(
OLLAMA_URL,
json={
"model": LOCAL_MODEL,
"prompt": LOCAL_PROMPT + document_text,
"stream": False,
"format": "json",
"options": {"num_predict": 1024, "temperature": 0, "num_ctx": 12288},
},
timeout=LOCAL_TIMEOUT,
)
resp.raise_for_status()
return {
"response": resp.json().get("response", ""),
"latency_s": round(time.time() - t0, 2),
}
except Exception as e:
return {"error": str(e), "latency_s": round(time.time() - t0, 2)}
def parse_graph(raw):
cleaned = strip_json_fences(raw)
if not cleaned:
return None, None
try:
data = json.loads(cleaned)
except json.JSONDecodeError:
return None, None
if not isinstance(data, dict):
return None, None
ents = data.get("entities")
edges = data.get("edges")
if isinstance(ents, list) and isinstance(edges, list):
return len(ents), len(edges)
return None, None
def parse_candidates(raw):
cleaned = strip_json_fences(raw)
if not cleaned:
return None
try:
data = json.loads(cleaned)
except json.JSONDecodeError:
return None
if not isinstance(data, dict):
return None
cands = data.get("candidates")
if isinstance(cands, list):
return [str(c).strip() for c in cands if c]
return None
def stratify(docs):
"""Pick 10 small / 12 medium / 8 large by character length, in file order."""
sized = [(d, d["content_length"]) for d in docs]
small = [d for d, n in sized if n < 1000]
medium = [d for d, n in sized if 1000 <= n < 5000]
large = [d for d, n in sized if n >= 5000]
return small[:10] + medium[:12] + large[:8]
def main():
api_key = os.environ.get("ANTHROPIC_API_KEY")
pg_dsn = os.environ.get("PG_DSN")
if not api_key or not pg_dsn:
print("ERROR: ANTHROPIC_API_KEY or PG_DSN not set", file=sys.stderr)
sys.exit(1)
if not V2_FILE.exists():
print(f"ERROR: {V2_FILE} not found", file=sys.stderr)
sys.exit(1)
with open(V2_FILE) as f:
v2 = json.load(f)
docs_meta = [d for d in v2["documents"] if d.get("status") == "SUCCESS"]
sample = stratify(docs_meta)
print(f"Sample: {len(sample)} docs (10s/12m/8l, file order)")
print(f"Skip-small threshold: <{SKIP_SMALL_THRESHOLD} chars")
print(f"Mistral context: 12288 tokens, doc cap {MAX_DOC_CHARS} chars")
print(f"Haiku model: {HAIKU_MODEL} temp={HAIKU_TEMPERATURE} max_tokens={HAIKU_MAX_TOKENS}")
print()
client = anthropic.Anthropic(api_key=api_key)
pg_conn = psycopg2.connect(pg_dsn)
results = []
started_at = datetime.now(timezone.utc).isoformat()
t_total = time.time()
for i, doc_meta in enumerate(sample, 1):
source = doc_meta["source"]
doc_text, original_len = fetch_document_text(pg_conn, source)
if not doc_text:
print(f"[{i:02d}/{len(sample)}] {source[:55]} — SKIP (not in pgvector)")
results.append({"source": source, "skipped": "not_in_pgvector"})
continue
sent_len = len(doc_text)
truncated = original_len > sent_len
size_bucket = (
"small" if sent_len < 1000
else "medium" if sent_len < 5000
else "large"
)
skip_small_routed = sent_len < SKIP_SMALL_THRESHOLD
trunc_marker = "*" if truncated else " "
route_marker = "[skip-small]" if skip_small_routed else "[cascade] "
print(f"[{i:02d}/{len(sample)}] [{size_bucket:6s}] [{sent_len:>5}c{trunc_marker}] "
f"{route_marker} {source[:50]}", flush=True)
# Condition A — always runs
try:
a = call_haiku(client, CONDITION_A_PROMPT + doc_text)
a_ents, a_edges = parse_graph(a["response_text"])
print(f" A: in={a['input_tokens']} out={a['output_tokens']} "
f"ents={a_ents} edges={a_edges} stop={a['stop_reason']} t={a['latency_s']}s",
flush=True)
except Exception as e:
print(f" A FAILED: {e}", flush=True)
a = {"error": str(e)}
a_ents = a_edges = None
# Condition B
if skip_small_routed:
# Skip-small: B = A. Same call, no local pass.
print(f" B: routed to baseline (skip-small)", flush=True)
b = a
b_ents = a_ents
b_edges = a_edges
local_result = {"skipped": "skip_small_routed"}
local_candidates = []
local_raw = ""
else:
local_result = call_local(doc_text)
if "error" in local_result:
print(f" B local FAILED: {local_result['error']} — recording skip", flush=True)
results.append({
"source": source,
"size_bucket": size_bucket,
"doc_chars_original": original_len,
"doc_chars_sent": sent_len,
"truncated": truncated,
"skip_small_routed": False,
"condition_a": {
"input_tokens": a.get("input_tokens"),
"output_tokens": a.get("output_tokens"),
"latency_s": a.get("latency_s"),
"entity_count": a_ents,
"edge_count": a_edges,
"stop_reason": a.get("stop_reason"),
"response_text": a.get("response_text", "")[:4000],
"error": a.get("error"),
},
"condition_b": {
"skipped": "local_model_failed",
"local_error": local_result["error"],
"local_latency_s": local_result.get("latency_s"),
},
})
continue
local_raw = local_result["response"]
cands = parse_candidates(local_raw)
local_candidates = cands or []
print(f" B local: t={local_result['latency_s']}s candidates={len(local_candidates)}",
flush=True)
if not local_candidates:
print(f" B local: empty draft — skipping API call", flush=True)
results.append({
"source": source,
"size_bucket": size_bucket,
"doc_chars_original": original_len,
"doc_chars_sent": sent_len,
"truncated": truncated,
"skip_small_routed": False,
"condition_a": {
"input_tokens": a.get("input_tokens"),
"output_tokens": a.get("output_tokens"),
"latency_s": a.get("latency_s"),
"entity_count": a_ents,
"edge_count": a_edges,
"stop_reason": a.get("stop_reason"),
"response_text": a.get("response_text", "")[:4000],
"error": a.get("error"),
},
"condition_b": {
"skipped": "local_draft_empty",
"local_latency_s": local_result.get("latency_s"),
"local_raw": local_raw[:1000],
},
})
continue
# Compressed draft format — bare JSON array
local_draft_json = json.dumps(local_candidates, ensure_ascii=False)
b_prompt = CONDITION_B_API_PROMPT_COMPRESSED.replace("{local_draft_json}", local_draft_json) + doc_text
try:
b = call_haiku(client, b_prompt)
b_ents, b_edges = parse_graph(b["response_text"])
print(f" B api: in={b['input_tokens']} out={b['output_tokens']} "
f"ents={b_ents} edges={b_edges} stop={b['stop_reason']} t={b['latency_s']}s",
flush=True)
except Exception as e:
print(f" B api FAILED: {e}", flush=True)
b = {"error": str(e)}
b_ents = b_edges = None
if "input_tokens" in a and "input_tokens" in b:
in_pct = (b["input_tokens"] - a["input_tokens"]) / a["input_tokens"] * 100 if a["input_tokens"] else 0.0
out_pct = (b["output_tokens"] - a["output_tokens"]) / a["output_tokens"] * 100 if a["output_tokens"] else 0.0
edge_pct_str = "n/a"
if a_edges and b_edges is not None and a_edges > 0:
edge_pct_str = f"{(b_edges - a_edges) / a_edges * 100:+.1f}%"
print(f" Δ input={in_pct:+.1f}% output={out_pct:+.1f}% edges={edge_pct_str}", flush=True)
results.append({
"source": source,
"size_bucket": size_bucket,
"doc_chars_original": original_len,
"doc_chars_sent": sent_len,
"truncated": truncated,
"skip_small_routed": skip_small_routed,
"condition_a": {
"input_tokens": a.get("input_tokens"),
"output_tokens": a.get("output_tokens"),
"latency_s": a.get("latency_s"),
"entity_count": a_ents,
"edge_count": a_edges,
"stop_reason": a.get("stop_reason"),
"response_text": a.get("response_text", "")[:4000],
"error": a.get("error"),
},
"condition_b": {
"skip_small_routed": skip_small_routed,
"local_latency_s": local_result.get("latency_s"),
"local_candidates": local_candidates,
"local_raw": local_raw[:1000],
"api_input_tokens": b.get("input_tokens"),
"api_output_tokens": b.get("output_tokens"),
"api_latency_s": b.get("latency_s"),
"entity_count": b_ents,
"edge_count": b_edges,
"stop_reason": b.get("stop_reason"),
"response_text": b.get("response_text", "")[:4000],
"error": b.get("error"),
},
})
pg_conn.close()
total_elapsed = round(time.time() - t_total, 1)
valid = [r for r in results
if r.get("condition_a", {}).get("input_tokens") is not None
and r.get("condition_b", {}).get("api_input_tokens") is not None]
a_in = sum(r["condition_a"]["input_tokens"] for r in valid)
a_out = sum(r["condition_a"]["output_tokens"] for r in valid)
b_in = sum(r["condition_b"]["api_input_tokens"] for r in valid)
b_out = sum(r["condition_b"]["api_output_tokens"] for r in valid)
a_cost = (a_in * HAIKU_IN_PER_M + a_out * HAIKU_OUT_PER_M) / 1_000_000
b_cost = (b_in * HAIKU_IN_PER_M + b_out * HAIKU_OUT_PER_M) / 1_000_000
by_bucket = {}
for bucket in ("small", "medium", "large"):
rows = [r for r in valid if r["size_bucket"] == bucket]
if not rows:
by_bucket[bucket] = None
continue
ai = sum(r["condition_a"]["input_tokens"] for r in rows)
ao = sum(r["condition_a"]["output_tokens"] for r in rows)
bi = sum(r["condition_b"]["api_input_tokens"] for r in rows)
bo = sum(r["condition_b"]["api_output_tokens"] for r in rows)
ae = [r["condition_a"]["edge_count"] for r in rows if r["condition_a"]["edge_count"] is not None]
be = [r["condition_b"]["edge_count"] for r in rows if r["condition_b"]["edge_count"] is not None]
skip_count = sum(1 for r in rows if r.get("skip_small_routed"))
by_bucket[bucket] = {
"n": len(rows),
"n_skip_small_routed": skip_count,
"n_cascade": len(rows) - skip_count,
"a_input_tokens": ai,
"a_output_tokens": ao,
"b_input_tokens": bi,
"b_output_tokens": bo,
"input_delta_pct": round((bi - ai) / ai * 100, 2) if ai else None,
"output_delta_pct": round((bo - ao) / ao * 100, 2) if ao else None,
"a_avg_edges": round(statistics.mean(ae), 1) if ae else None,
"b_avg_edges": round(statistics.mean(be), 1) if be else None,
}
summary = {
"experiment": "cascade_optimization_test",
"title": "Cascade Optimization — skip-small + compressed-draft",
"started_at": started_at,
"completed_at": datetime.now(timezone.utc).isoformat(),
"haiku_model": HAIKU_MODEL,
"haiku_temperature": HAIKU_TEMPERATURE,
"haiku_max_tokens": HAIKU_MAX_TOKENS,
"local_model": LOCAL_MODEL,
"max_doc_chars": MAX_DOC_CHARS,
"skip_small_threshold": SKIP_SMALL_THRESHOLD,
"n_documents": len(sample),
"n_valid_pairs": len(valid),
"n_skipped": len(sample) - len(valid),
"total_elapsed_s": total_elapsed,
"totals": {
"a_input_tokens": a_in,
"a_output_tokens": a_out,
"b_input_tokens": b_in,
"b_output_tokens": b_out,
"a_cost_usd": round(a_cost, 4),
"b_cost_usd": round(b_cost, 4),
"cost_delta_usd": round(b_cost - a_cost, 4),
"cost_delta_pct": round((b_cost - a_cost) / a_cost * 100, 2) if a_cost else None,
"prior_unoptimized_cascade_pct": -12.66,
"note": "API cost only — local Mistral runtime on VPS not monetized",
},
"by_size_bucket": by_bucket,
"results": results,
}
OUTPUT_FILE.parent.mkdir(parents=True, exist_ok=True)
with open(OUTPUT_FILE, "w") as f:
json.dump(summary, f, indent=2)
print()
print("=" * 60)
print(f"DONE — {len(valid)}/{len(sample)} valid pairs in {total_elapsed}s")
print(f"A total cost: ${a_cost:.4f} (in={a_in} out={a_out})")
print(f"B total cost: ${b_cost:.4f} (in={b_in} out={b_out})")
delta_pct = summary['totals']['cost_delta_pct']
if delta_pct is not None:
verdict = "B cheaper" if delta_pct < 0 else "B more expensive"
print(f"Cost delta: {delta_pct:+.2f}% ({verdict})")
opt_delta = delta_pct - (-12.66)
print(f"Optimization delta vs prior cascade: {opt_delta:+.2f} points "
f"(prior was -12.66%)")
print()
print("By size bucket:")
for bucket, stats in by_bucket.items():
if stats:
print(f" {bucket:6s} (n={stats['n']}, skip={stats['n_skip_small_routed']}): "
f"in {stats['input_delta_pct']:+.1f}% "
f"out {stats['output_delta_pct']:+.1f}% "
f"edges A={stats['a_avg_edges']} B={stats['b_avg_edges']}")
print()
print("Results: " + str(OUTPUT_FILE))
if __name__ == "__main__":
main()
+485
View File
@@ -0,0 +1,485 @@
#!/usr/bin/env python3
"""
Cascade Test — Nodes-vs-Edges Experiment
Tests whether splitting graph extraction into "local drafts entity candidates,
API verifies + draws edges" reduces total API cost vs single-shot full
extraction, while producing a comparable graph.
Two conditions per document:
A — Baseline: single Claude Haiku call, full extraction
B — Cascade: Mistral lists entity candidates, then Haiku does verify+edges
Both conditions:
- See the full document (parity-respecting)
- Use open entity type vocabulary (no fixed schema)
- Use natural-language predicates (no constrained relations)
- Same target output schema, same temperature
Sample: 20 docs from briefing_test_v2_results.json, stratified by char length.
Reports API cost only. Local Mistral time is recorded but not monetized
(ran on the VPS, no per-token API charge).
Outputs: ~/aaronai/experiments/cascade_test_results.json
"""
import json
import os
import re
import statistics
import sys
import time
from datetime import datetime, timezone
from pathlib import Path
import anthropic
import psycopg2
import requests
from dotenv import load_dotenv
load_dotenv(Path.home() / "aaronai" / ".env")
V2_FILE = Path.home() / "aaronai" / "briefing_test_v2_results.json"
OUTPUT_FILE = Path.home() / "aaronai" / "experiments" / "cascade_test_results.json"
HAIKU_MODEL = "claude-haiku-4-5-20251001"
HAIKU_MAX_TOKENS = 4096
HAIKU_TEMPERATURE = 0.0
OLLAMA_URL = "http://localhost:11434/api/generate"
LOCAL_MODEL = "mistral"
LOCAL_TIMEOUT = 120
MAX_DOC_CHARS = 8000
# Verified pricing 2026-04-28 against Anthropic docs
HAIKU_IN_PER_M = 1.0
HAIKU_OUT_PER_M = 5.0
CONDITION_A_PROMPT = """Extract a knowledge graph from the document below.
Return ONLY valid JSON with this exact schema:
{
"entities": [
{"name": string, "type": string}
],
"edges": [
{"subject": string, "predicate": string, "object": string}
]
}
Entity types: use whatever fits the entity. Do not constrain yourself to a fixed list.
Edge predicates: natural language phrases that capture the actual relationship the document states or implies.
Extract every entity and every relationship the document states or strongly implies. Both subject and object in every edge must appear in entities. JSON only, no commentary, no markdown fences.
DOCUMENT:
"""
LOCAL_PROMPT = """List every named entity that appears in the document below — every person, organization, place, project, document, material, technique, date, event, or other named thing.
Return ONLY valid JSON:
{
"candidates": [string]
}
Just names. No types, no relationships. JSON only.
DOCUMENT:
"""
CONDITION_B_API_PROMPT_WITH_DRAFT = """Extract a knowledge graph from the document below.
A local model has identified entity candidates that may help orient your reading. Treat the candidates as a hint, not as truth — verify each candidate appears in the document, ignore any that do not, and add any entities the candidates missed.
Return ONLY valid JSON with this exact schema:
{
"entities": [
{"name": string, "type": string}
],
"edges": [
{"subject": string, "predicate": string, "object": string}
]
}
Entity types: use whatever fits. Edge predicates: natural language phrases capturing the actual relationship. Both subject and object in every edge must appear in entities. Extract every entity and every relationship the document states or strongly implies. JSON only, no commentary, no markdown fences.
ENTITY CANDIDATES FROM LOCAL MODEL:
{local_draft}
DOCUMENT:
"""
def strip_json_fences(text):
if not text:
return ""
t = text.strip()
t = re.sub(r"^```(?:json)?\s*", "", t)
t = re.sub(r"\s*```$", "", t)
return t.strip()
def fetch_document_text(pg_conn, source):
cur = pg_conn.cursor()
cur.execute(
"SELECT document FROM embeddings WHERE source = %s ORDER BY id",
(source,),
)
rows = cur.fetchall()
cur.close()
if not rows:
return None, 0
full = "\n\n".join(r[0] for r in rows)
return full[:MAX_DOC_CHARS], len(full)
def call_haiku(client, prompt_text):
t0 = time.time()
resp = client.messages.create(
model=HAIKU_MODEL,
max_tokens=HAIKU_MAX_TOKENS,
temperature=HAIKU_TEMPERATURE,
messages=[{"role": "user", "content": prompt_text}],
)
return {
"input_tokens": resp.usage.input_tokens,
"output_tokens": resp.usage.output_tokens,
"latency_s": round(time.time() - t0, 2),
"response_text": resp.content[0].text if resp.content else "",
"stop_reason": resp.stop_reason,
}
def call_local(document_text):
t0 = time.time()
try:
resp = requests.post(
OLLAMA_URL,
json={
"model": LOCAL_MODEL,
"prompt": LOCAL_PROMPT + document_text,
"stream": False,
"format": "json",
"options": {"num_predict": 1024, "temperature": 0, "num_ctx": 8192},
},
timeout=LOCAL_TIMEOUT,
)
resp.raise_for_status()
return {
"response": resp.json().get("response", ""),
"latency_s": round(time.time() - t0, 2),
}
except Exception as e:
return {"error": str(e), "latency_s": round(time.time() - t0, 2)}
def parse_graph(raw):
cleaned = strip_json_fences(raw)
if not cleaned:
return None, None
try:
data = json.loads(cleaned)
except json.JSONDecodeError:
return None, None
if not isinstance(data, dict):
return None, None
ents = data.get("entities")
edges = data.get("edges")
if isinstance(ents, list) and isinstance(edges, list):
return len(ents), len(edges)
return None, None
def parse_candidates(raw):
cleaned = strip_json_fences(raw)
if not cleaned:
return None
try:
data = json.loads(cleaned)
except json.JSONDecodeError:
return None
if not isinstance(data, dict):
return None
cands = data.get("candidates")
if isinstance(cands, list):
return [str(c).strip() for c in cands if c]
return None
def stratify(docs):
"""Pick 5 small / 10 medium / 5 large by character length, in file order."""
sized = [(d, d["content_length"]) for d in docs]
small = [d for d, n in sized if n < 1000]
medium = [d for d, n in sized if 1000 <= n < 5000]
large = [d for d, n in sized if n >= 5000]
return small[:5] + medium[:10] + large[:5]
def main():
api_key = os.environ.get("ANTHROPIC_API_KEY")
pg_dsn = os.environ.get("PG_DSN")
if not api_key or not pg_dsn:
print("ERROR: ANTHROPIC_API_KEY or PG_DSN not set", file=sys.stderr)
sys.exit(1)
if not V2_FILE.exists():
print(f"ERROR: {V2_FILE} not found", file=sys.stderr)
sys.exit(1)
with open(V2_FILE) as f:
v2 = json.load(f)
docs_meta = [d for d in v2["documents"] if d.get("status") == "SUCCESS"]
sample = stratify(docs_meta)
print(f"Sample: {len(sample)} docs (stratified by char length, file order)")
for d in sample:
print(f" [{d['content_length']:>6}c] {d['source'][:60]}")
print(f"Haiku model: {HAIKU_MODEL} temp={HAIKU_TEMPERATURE} max_tokens={HAIKU_MAX_TOKENS}")
print(f"Local model: {LOCAL_MODEL}")
print()
client = anthropic.Anthropic(api_key=api_key)
pg_conn = psycopg2.connect(pg_dsn)
results = []
started_at = datetime.now(timezone.utc).isoformat()
t_total = time.time()
for i, doc_meta in enumerate(sample, 1):
source = doc_meta["source"]
doc_text, original_len = fetch_document_text(pg_conn, source)
if not doc_text:
print(f"[{i:02d}/{len(sample)}] {source[:60]} — SKIP (not in pgvector)")
results.append({"source": source, "skipped": "not_in_pgvector"})
continue
sent_len = len(doc_text)
truncated = original_len > sent_len
size_bucket = (
"small" if sent_len < 1000
else "medium" if sent_len < 5000
else "large"
)
trunc_marker = "*" if truncated else " "
print(f"[{i:02d}/{len(sample)}] [{size_bucket:6s}] [{sent_len:>5}c{trunc_marker}] {source[:55]}", flush=True)
# Condition A
try:
a = call_haiku(client, CONDITION_A_PROMPT + doc_text)
a_ents, a_edges = parse_graph(a["response_text"])
print(f" A: in={a['input_tokens']} out={a['output_tokens']} "
f"ents={a_ents} edges={a_edges} stop={a['stop_reason']} t={a['latency_s']}s",
flush=True)
except Exception as e:
print(f" A FAILED: {e}", flush=True)
a = {"error": str(e)}
a_ents = a_edges = None
# Condition B local pass
local_result = call_local(doc_text)
if "error" in local_result:
print(f" B local FAILED: {local_result['error']} — skipping doc", flush=True)
results.append({
"source": source,
"size_bucket": size_bucket,
"doc_chars_original": original_len,
"doc_chars_sent": sent_len,
"truncated": truncated,
"condition_a": {
"input_tokens": a.get("input_tokens"),
"output_tokens": a.get("output_tokens"),
"latency_s": a.get("latency_s"),
"entity_count": a_ents,
"edge_count": a_edges,
"stop_reason": a.get("stop_reason"),
"response_text": a.get("response_text", "")[:4000],
"error": a.get("error"),
},
"condition_b": {
"skipped": "local_model_failed",
"local_error": local_result["error"],
"local_latency_s": local_result.get("latency_s"),
},
})
continue
local_raw = local_result["response"]
cands = parse_candidates(local_raw)
local_candidates = cands or []
print(f" B local: t={local_result['latency_s']}s candidates={len(local_candidates)}",
flush=True)
if not local_candidates:
print(f" B local: empty draft — skipping API call to avoid asymmetric test", flush=True)
results.append({
"source": source,
"size_bucket": size_bucket,
"doc_chars_original": original_len,
"doc_chars_sent": sent_len,
"truncated": truncated,
"condition_a": {
"input_tokens": a.get("input_tokens"),
"output_tokens": a.get("output_tokens"),
"latency_s": a.get("latency_s"),
"entity_count": a_ents,
"edge_count": a_edges,
"stop_reason": a.get("stop_reason"),
"response_text": a.get("response_text", "")[:4000],
"error": a.get("error"),
},
"condition_b": {
"skipped": "local_draft_empty",
"local_latency_s": local_result.get("latency_s"),
"local_raw": local_raw[:1000],
},
})
continue
local_draft_str = "\n".join(f"- {c}" for c in local_candidates)
b_prompt = CONDITION_B_API_PROMPT_WITH_DRAFT.replace("{local_draft}", local_draft_str) + doc_text
try:
b = call_haiku(client, b_prompt)
b_ents, b_edges = parse_graph(b["response_text"])
print(f" B api: in={b['input_tokens']} out={b['output_tokens']} "
f"ents={b_ents} edges={b_edges} stop={b['stop_reason']} t={b['latency_s']}s",
flush=True)
except Exception as e:
print(f" B api FAILED: {e}", flush=True)
b = {"error": str(e)}
b_ents = b_edges = None
if "input_tokens" in a and "input_tokens" in b:
in_pct = (b["input_tokens"] - a["input_tokens"]) / a["input_tokens"] * 100 if a["input_tokens"] else 0.0
out_pct = (b["output_tokens"] - a["output_tokens"]) / a["output_tokens"] * 100 if a["output_tokens"] else 0.0
edge_pct_str = "n/a"
if a_edges and b_edges is not None and a_edges > 0:
edge_pct_str = f"{(b_edges - a_edges) / a_edges * 100:+.1f}%"
print(f" Δ input={in_pct:+.1f}% output={out_pct:+.1f}% edges={edge_pct_str}", flush=True)
results.append({
"source": source,
"size_bucket": size_bucket,
"doc_chars_original": original_len,
"doc_chars_sent": sent_len,
"truncated": truncated,
"condition_a": {
"input_tokens": a.get("input_tokens"),
"output_tokens": a.get("output_tokens"),
"latency_s": a.get("latency_s"),
"entity_count": a_ents,
"edge_count": a_edges,
"stop_reason": a.get("stop_reason"),
"response_text": a.get("response_text", "")[:4000],
"error": a.get("error"),
},
"condition_b": {
"local_latency_s": local_result.get("latency_s"),
"local_candidates": local_candidates,
"local_raw": local_raw[:1000],
"api_input_tokens": b.get("input_tokens"),
"api_output_tokens": b.get("output_tokens"),
"api_latency_s": b.get("latency_s"),
"entity_count": b_ents,
"edge_count": b_edges,
"stop_reason": b.get("stop_reason"),
"response_text": b.get("response_text", "")[:4000],
"error": b.get("error"),
},
})
pg_conn.close()
total_elapsed = round(time.time() - t_total, 1)
valid = [r for r in results
if r.get("condition_a", {}).get("input_tokens") is not None
and r.get("condition_b", {}).get("api_input_tokens") is not None]
a_in = sum(r["condition_a"]["input_tokens"] for r in valid)
a_out = sum(r["condition_a"]["output_tokens"] for r in valid)
b_in = sum(r["condition_b"]["api_input_tokens"] for r in valid)
b_out = sum(r["condition_b"]["api_output_tokens"] for r in valid)
a_cost = (a_in * HAIKU_IN_PER_M + a_out * HAIKU_OUT_PER_M) / 1_000_000
b_cost = (b_in * HAIKU_IN_PER_M + b_out * HAIKU_OUT_PER_M) / 1_000_000
by_bucket = {}
for bucket in ("small", "medium", "large"):
rows = [r for r in valid if r["size_bucket"] == bucket]
if not rows:
by_bucket[bucket] = None
continue
ai = sum(r["condition_a"]["input_tokens"] for r in rows)
ao = sum(r["condition_a"]["output_tokens"] for r in rows)
bi = sum(r["condition_b"]["api_input_tokens"] for r in rows)
bo = sum(r["condition_b"]["api_output_tokens"] for r in rows)
ae = [r["condition_a"]["edge_count"] for r in rows if r["condition_a"]["edge_count"] is not None]
be = [r["condition_b"]["edge_count"] for r in rows if r["condition_b"]["edge_count"] is not None]
by_bucket[bucket] = {
"n": len(rows),
"a_input_tokens": ai,
"a_output_tokens": ao,
"b_input_tokens": bi,
"b_output_tokens": bo,
"input_delta_pct": round((bi - ai) / ai * 100, 2) if ai else None,
"output_delta_pct": round((bo - ao) / ao * 100, 2) if ao else None,
"a_avg_edges": round(statistics.mean(ae), 1) if ae else None,
"b_avg_edges": round(statistics.mean(be), 1) if be else None,
}
summary = {
"experiment": "cascade_test",
"title": "Nodes-vs-Edges Cascade Experiment",
"started_at": started_at,
"completed_at": datetime.now(timezone.utc).isoformat(),
"haiku_model": HAIKU_MODEL,
"haiku_temperature": HAIKU_TEMPERATURE,
"haiku_max_tokens": HAIKU_MAX_TOKENS,
"local_model": LOCAL_MODEL,
"max_doc_chars": MAX_DOC_CHARS,
"n_documents": len(sample),
"n_valid_pairs": len(valid),
"n_skipped": len(sample) - len(valid),
"total_elapsed_s": total_elapsed,
"totals": {
"a_input_tokens": a_in,
"a_output_tokens": a_out,
"b_input_tokens": b_in,
"b_output_tokens": b_out,
"a_cost_usd": round(a_cost, 4),
"b_cost_usd": round(b_cost, 4),
"cost_delta_usd": round(b_cost - a_cost, 4),
"cost_delta_pct": round((b_cost - a_cost) / a_cost * 100, 2) if a_cost else None,
"note": "API cost only — local Mistral runtime on VPS not monetized",
},
"by_size_bucket": by_bucket,
"results": results,
}
OUTPUT_FILE.parent.mkdir(parents=True, exist_ok=True)
with open(OUTPUT_FILE, "w") as f:
json.dump(summary, f, indent=2)
print()
print("=" * 60)
print(f"DONE — {len(valid)}/{len(sample)} valid pairs in {total_elapsed}s")
print(f"A total cost: ${a_cost:.4f} (in={a_in} out={a_out})")
print(f"B total cost: ${b_cost:.4f} (in={b_in} out={b_out})")
delta_pct = summary['totals']['cost_delta_pct']
if delta_pct is not None:
verdict = "B cheaper" if delta_pct < 0 else "B more expensive"
print(f"Cost delta: {delta_pct:+.2f}% ({verdict})")
print()
print("By size bucket:")
for bucket, stats in by_bucket.items():
if stats:
print(f" {bucket:6s} (n={stats['n']}): "
f"in {stats['input_delta_pct']:+.1f}% "
f"out {stats['output_delta_pct']:+.1f}% "
f"edges A={stats['a_avg_edges']} B={stats['b_avg_edges']}")
print()
print(f"NOTE: API cost only. Local Mistral runtime is not monetized.")
print(f"Results: {OUTPUT_FILE}")
if __name__ == "__main__":
main()
+230
View File
@@ -0,0 +1,230 @@
#!/usr/bin/env python3
"""
Experiment 003 — Entity-Only Consistency Test
Three Mistral passes per document, measure consistency on entity fields only
(people, organizations, locations, dates). Excludes document_type label.
DISTINCT ON (source) sampling — fixes Exp 001 chunk-replacement flaw.
Outputs: ~/aaronai/experiments/consistency_test_v2_results.json
"""
import json
import os
import re
import sys
import time
from datetime import datetime, timezone
from pathlib import Path
import psycopg2
import requests
from dotenv import load_dotenv
load_dotenv(Path.home() / "aaronai" / ".env")
OUTPUT_FILE = Path.home() / "aaronai" / "experiments" / "consistency_test_v2_results.json"
OLLAMA_URL = "http://localhost:11434/api/generate"
MODEL = "mistral"
N_PASSES = 3
N_DOCS = 50
PER_CALL_TIMEOUT = 60 # seconds — fail fast, don't wedge
MAX_DOC_CHARS = 8000 # cap document length sent to Mistral
EXTRACTION_PROMPT = """Extract entities from the document below. Return ONLY valid JSON with this exact schema:
{
"people": [string],
"organizations": [string],
"locations": [string],
"dates": [string]
}
Rules:
- Only include entities you are CERTAIN about. If uncertain, omit.
- No prose, no markdown fences, no commentary. JSON only.
- Empty arrays are valid.
DOCUMENT:
"""
def call_mistral(document_text):
truncated = document_text[:MAX_DOC_CHARS]
t0 = time.time()
try:
resp = requests.post(
OLLAMA_URL,
json={
"model": MODEL,
"prompt": EXTRACTION_PROMPT + truncated,
"stream": False,
"format": "json",
"options": {"num_predict": 512},
},
timeout=PER_CALL_TIMEOUT,
)
resp.raise_for_status()
return {
"response": resp.json().get("response", ""),
"latency_s": round(time.time() - t0, 2),
"truncated": len(document_text) > MAX_DOC_CHARS,
}
except requests.exceptions.Timeout:
return {"error": f"timeout after {PER_CALL_TIMEOUT}s", "latency_s": PER_CALL_TIMEOUT}
except Exception as e:
return {"error": str(e), "latency_s": round(time.time() - t0, 2)}
def parse_entities(raw_response):
text = (raw_response or "").strip()
text = re.sub(r"^```(?:json)?\s*", "", text)
text = re.sub(r"\s*```$", "", text)
try:
data = json.loads(text)
except json.JSONDecodeError:
return None
out = {}
for key in ("people", "organizations", "locations", "dates"):
vals = data.get(key, [])
if not isinstance(vals, list):
return None
out[key] = sorted(set(str(v).strip().lower() for v in vals if v))
return out
def entities_match(a, b):
if a is None or b is None:
return False
return all(a[k] == b[k] for k in ("people", "organizations", "locations", "dates"))
def fetch_distinct_sources(pg_conn, n):
cur = pg_conn.cursor()
cur.execute("""
SELECT source, string_agg(document, E'\n\n' ORDER BY id) AS doc
FROM embeddings
WHERE source IS NOT NULL
GROUP BY source
ORDER BY MIN(id)
LIMIT %s
""", (n,))
rows = cur.fetchall()
cur.close()
return [(s, d) for s, d in rows if d and len(d.strip()) > 50]
def main():
pg_dsn = os.environ.get("PG_DSN")
if not pg_dsn:
print("ERROR: PG_DSN not set", file=sys.stderr)
sys.exit(1)
pg_conn = psycopg2.connect(pg_dsn)
docs = fetch_distinct_sources(pg_conn, N_DOCS)
pg_conn.close()
print(f"Loaded {len(docs)} distinct sources from pgvector")
print(f"Model: {MODEL} | Passes per doc: {N_PASSES}")
print(f"Per-call timeout: {PER_CALL_TIMEOUT}s | Max doc chars: {MAX_DOC_CHARS}")
print(f"Calls planned: {len(docs) * N_PASSES}\n")
results = []
started_at = datetime.now(timezone.utc).isoformat()
t_total = time.time()
for i, (source, doc_text) in enumerate(docs, 1):
size_marker = f"[{len(doc_text):>5}c]"
print(f"[{i:02d}/{len(docs)}] {size_marker} {source[:55]}", flush=True)
passes = []
for p in range(N_PASSES):
r = call_mistral(doc_text)
if "error" in r:
print(f" pass {p+1}: {r['error']}", flush=True)
passes.append({"error": r["error"], "parsed_ok": False, "latency_s": r["latency_s"]})
else:
entities = parse_entities(r["response"])
passes.append({
"raw": r["response"][:500],
"entities": entities,
"latency_s": r["latency_s"],
"parsed_ok": entities is not None,
"truncated_input": r.get("truncated", False),
})
all_parsed = all(p.get("parsed_ok") for p in passes)
if all_parsed:
e1, e2, e3 = passes[0]["entities"], passes[1]["entities"], passes[2]["entities"]
consistent = entities_match(e1, e2) and entities_match(e2, e3)
per_field = {
k: (e1[k] == e2[k] == e3[k])
for k in ("people", "organizations", "locations", "dates")
}
else:
consistent = False
per_field = None
latencies = [p.get("latency_s", 0) for p in passes]
print(f" parsed={all_parsed} consistent={consistent} latencies={latencies}", flush=True)
results.append({
"source": source,
"doc_chars": len(doc_text),
"passes": passes,
"all_parsed": all_parsed,
"consistent": consistent,
"per_field_consistency": per_field,
})
total_elapsed = round(time.time() - t_total, 1)
parsed = [r for r in results if r["all_parsed"]]
consistent = [r for r in parsed if r["consistent"]]
field_rates = {k: 0 for k in ("people", "organizations", "locations", "dates")}
for r in parsed:
for k, v in (r["per_field_consistency"] or {}).items():
if v:
field_rates[k] += 1
field_rates_pct = {
k: round(100 * v / len(parsed), 1) if parsed else 0.0
for k, v in field_rates.items()
}
summary = {
"experiment": "003",
"title": "Entity-Only Consistency Test",
"started_at": started_at,
"completed_at": datetime.now(timezone.utc).isoformat(),
"model": MODEL,
"n_passes": N_PASSES,
"per_call_timeout_s": PER_CALL_TIMEOUT,
"max_doc_chars": MAX_DOC_CHARS,
"n_documents": len(docs),
"n_all_parsed": len(parsed),
"n_fully_consistent": len(consistent),
"consistency_rate_pct": round(100 * len(consistent) / len(docs), 2) if docs else 0.0,
"consistency_rate_among_parsed_pct": (
round(100 * len(consistent) / len(parsed), 2) if parsed else 0.0
),
"per_field_consistency_pct": field_rates_pct,
"total_elapsed_s": total_elapsed,
"exp_001_baseline_pct": 18.0,
"results": results,
}
OUTPUT_FILE.parent.mkdir(parents=True, exist_ok=True)
with open(OUTPUT_FILE, "w") as f:
json.dump(summary, f, indent=2)
print()
print("=" * 60)
print(f"DONE — {len(docs)} docs in {total_elapsed}s")
print(f"All 3 passes parsed cleanly: {len(parsed)}/{len(docs)}")
print(f"Fully consistent (all 4 fields match): {len(consistent)}/{len(docs)} ({summary['consistency_rate_pct']}%)")
print(f"Among parsed only: {summary['consistency_rate_among_parsed_pct']}%")
print(f"Per-field consistency: {field_rates_pct}")
print(f"Exp 001 baseline: 18% | delta: {summary['consistency_rate_pct'] - 18.0:+.2f} pts")
print(f"Results: {OUTPUT_FILE}")
if __name__ == "__main__":
main()
+551
View File
@@ -0,0 +1,551 @@
"""
Consolidator 0.1 — alias resolution agent for BirdAI's Tier 1 substrate.
Reads entities from FalkorDB group_id 'aaron', infers light type labels,
computes pairwise similarity within type blocks using ego summary embedding +
name string distance + neighbor pattern overlap, generates merge proposals
above threshold, writes proposal log for human review.
Does NOT execute merges. 0.1 is the calibration phase — proposals only,
human reviews before any action.
"""
import json
import re
import os
import time
from datetime import datetime, timezone
from collections import defaultdict
from pathlib import Path
import requests
from falkordb import FalkorDB
import numpy as np
# Configuration
GROUP_ID = "aaron"
HIGH_CONFIDENCE_THRESHOLD = 0.85 # propose merge above this
LOW_CONFIDENCE_THRESHOLD = 0.65 # log as low-confidence below
PROPOSALS_DIR = Path("/home/aaron/Nextcloud/Journal/Consolidation")
PROPOSALS_DIR.mkdir(parents=True, exist_ok=True)
def cosine_similarity(a, b):
"""Cosine similarity between two embedding vectors."""
a = np.array(a, dtype=np.float32)
b = np.array(b, dtype=np.float32)
na = np.linalg.norm(a)
nb = np.linalg.norm(b)
if na == 0 or nb == 0:
return 0.0
return float(np.dot(a, b) / (na * nb))
def name_similarity(name_a, name_b):
"""
Token-overlap-based name similarity.
Handles formal/informal pairs (Aaron / Aaron Nelson),
abbreviation pairs (HVAMC / Hudson Valley AMC),
and simple transcription noise.
"""
a_lower = name_a.lower().strip()
b_lower = name_b.lower().strip()
if a_lower == b_lower:
return 1.0
# Tokenize
a_tokens = set(re.findall(r'\b\w+\b', a_lower))
b_tokens = set(re.findall(r'\b\w+\b', b_lower))
if not a_tokens or not b_tokens:
return 0.0
# Substring containment (handles "Aaron" in "Aaron Nelson")
if a_lower in b_lower or b_lower in a_lower:
# Strong signal but not 1.0 — different lengths
shorter = min(len(a_lower), len(b_lower))
longer = max(len(a_lower), len(b_lower))
return 0.7 + 0.2 * (shorter / longer)
# Token Jaccard (handles "Aaron Nelson" vs "Nelson, Aaron")
intersection = a_tokens & b_tokens
union = a_tokens | b_tokens
jaccard = len(intersection) / len(union)
# Acronym check (HVAMC vs Hudson Valley Additive Manufacturing Center)
def is_acronym(short, full):
if len(short) >= len(full):
return False
if not short.isupper():
short_upper = short.upper()
else:
short_upper = short
full_words = full.split()
if len(full_words) < 2:
return False
first_letters = ''.join(w[0].upper() for w in full_words if w)
return short_upper == first_letters or short_upper in first_letters
if is_acronym(name_a, name_b) or is_acronym(name_b, name_a):
return 0.85
return jaccard
def infer_type(entity_name, summary):
"""
Light type inference for blocking. Heuristic-based, transparent.
Returns one of: person, organization, project, place, concept, unknown.
NOT a precise classification — just enough to avoid obviously wrong
cross-type comparisons (person vs project). When in doubt, return
'unknown' which gets compared against everything.
"""
name_lower = entity_name.lower().strip()
summary_lower = (summary or "").lower()
# Person: name patterns
person_indicators = [
# First+Last name pattern (two title-cased words, no other tokens)
bool(re.match(r'^[A-Z][a-z]+ [A-Z][a-z]+$', entity_name.strip())),
# Single name that's also in the summary as a person
any(phrase in summary_lower for phrase in [
'is a person', 'is a professor', 'is an artist', 'is a colleague',
'is a friend', 'is a family member', 'works at', 'studied at',
"'s spouse", "'s child", "'s parent", "'s student",
]),
]
if any(person_indicators):
return "person"
# Organization: company/institution indicators
org_indicators = [
any(suffix in name_lower for suffix in [
' inc', ' llc', ' corp', ' company', ' university', ' college',
' school', ' institute', ' foundation', ' department',
]),
any(phrase in summary_lower for phrase in [
'is a company', 'is a university', 'is an organization',
'is an institution', 'is a department', 'is a nonprofit',
]),
]
if any(org_indicators):
return "organization"
# Project: software/creative work indicators
project_indicators = [
any(phrase in summary_lower for phrase in [
'is a project', 'software project', 'is a codebase',
'is a tool', 'is a system', 'is an application',
'is a research project', 'is a design project',
]),
any(suffix in name_lower for suffix in [' project', ' system', ' platform']),
]
if any(project_indicators):
return "project"
# Place: location indicators
place_indicators = [
any(phrase in summary_lower for phrase in [
'is a city', 'is a town', 'is a state', 'is a country',
'is a neighborhood', 'is a region', 'is a location',
]),
]
if any(place_indicators):
return "place"
# Default
return "unknown"
def get_neighbors(graph, entity_uuid, limit=20):
"""Get the names of entities connected to this entity (1-hop)."""
query = """
MATCH (e:Entity {uuid: $uuid})-[r:RELATES_TO]-(other:Entity)
RETURN DISTINCT other.name AS name
LIMIT $limit
"""
result = graph.query(query, {"uuid": entity_uuid, "limit": limit})
return set(row[0] for row in result.result_set if row[0])
def neighbor_jaccard(neighbors_a, neighbors_b):
"""
Asymmetric neighbor overlap (containment metric).
Returns |A ∩ B| / min(|A|, |B|) — the fraction of the smaller entity's
neighbors that are also neighbors of the larger entity.
Asymmetric is the right metric for personal cognitive corpora, where
one entity (e.g., the user) is a hub with hundreds of edges and alias
candidates are smaller subset entities. Jaccard penalizes this
asymmetry as if it were dissimilarity; containment reveals it as the
subset relationship it is.
DEG-RAG used Jaccard because their academic-corpus entities are
roughly comparable in connectivity. Personal corpora have different
topology and need a different metric.
"""
if not neighbors_a and not neighbors_b:
return 0.0
intersection = neighbors_a & neighbors_b
smaller = min(len(neighbors_a), len(neighbors_b))
if smaller == 0:
return 0.0
return len(intersection) / smaller
def get_edge_count(graph, entity_uuid):
query = """
MATCH (e:Entity {uuid: $uuid})-[r:RELATES_TO]-()
RETURN count(r) AS c
"""
result = graph.query(query, {"uuid": entity_uuid})
return result.result_set[0][0] if result.result_set else 0
def combine_signals(name_sim, ego_sim, neighbor_sim):
"""
Combine the three similarity signals into a single confidence score.
Weighting tuned for personal cognitive corpora:
- Summary embedding ego similarity is primary signal
- Containment-based neighbor overlap is strong secondary (catches Aaron+Nelson
where the smaller entity's neighbors are mostly a subset of the hub's)
- Name similarity is tie-breaker (handles acronyms via name_similarity helper)
Different from DEG-RAG defaults because personal corpora have asymmetric
topology (hub user, subset alias entities).
"""
# Strong neighbor containment alone is meaningful — if entity B's neighbors
# are mostly contained in entity A's, even with different names and weak
# name_embedding similarity, that's the asymmetric alias case (Aaron+Nelson).
# Require some ego support but not high.
if neighbor_sim >= 0.7 and ego_sim >= 0.3:
return 0.4 * neighbor_sim + 0.4 * ego_sim + 0.2 * name_sim
# If ego is very low AND neighbor overlap is weak, probably not aliases
if ego_sim < 0.3 and neighbor_sim < 0.4:
return min(0.4, max(ego_sim, neighbor_sim))
# If name is very similar AND ego is at least moderate, high confidence
if name_sim >= 0.85 and ego_sim >= 0.5:
return 0.4 * ego_sim + 0.4 * name_sim + 0.2 * neighbor_sim
# Standard weighted average — ego primary, neighbor and name balanced
return 0.45 * ego_sim + 0.3 * neighbor_sim + 0.25 * name_sim
def compute_summary_embedding(text, model="nomic-embed-text"):
"""
Compute embedding for a summary text via Ollama.
Used to get ego similarity between entities based on what their summaries
say (the actual semantic content) rather than just their names. Aaron's
name_embedding and Nelson's name_embedding have low cosine similarity
because the names are different tokens. But their summaries describe
overlapping content (faculty member at SUNY, HVAMC, etc.) so summary
embeddings should produce a much stronger ego signal.
"""
if not text or len(text) < 10:
return None
try:
response = requests.post(
"http://localhost:11434/api/embeddings",
json={"model": model, "prompt": text[:2000]},
timeout=30,
)
response.raise_for_status()
return response.json().get("embedding")
except Exception as e:
print(f" Embedding error: {e}")
return None
def precompute_summary_embeddings(entities, model="nomic-embed-text"):
"""Compute and cache summary embeddings for all entities."""
print(f"Computing summary embeddings via Ollama ({model})...")
print(f" Total entities: {len(entities)}")
cache_path = Path("/home/aaron/aaronai/experiments/summary_embeddings_cache.json")
cache = {}
if cache_path.exists():
with open(cache_path) as f:
cache = json.load(f)
print(f" Loaded {len(cache)} cached embeddings")
new_count = 0
start = time.time()
for i, e in enumerate(entities):
if e["uuid"] in cache:
e["summary_embedding"] = cache[e["uuid"]]
continue
emb = compute_summary_embedding(e["summary"], model=model)
if emb:
e["summary_embedding"] = emb
cache[e["uuid"]] = emb
new_count += 1
else:
e["summary_embedding"] = None
# Save cache periodically
if new_count > 0 and new_count % 100 == 0:
with open(cache_path, "w") as f:
json.dump(cache, f)
elapsed = time.time() - start
rate = new_count / elapsed
remaining = (len(entities) - i - 1) / rate if rate > 0 else 0
print(f" ... {i+1}/{len(entities)} (computed {new_count} new, ~{remaining:.0f}s remaining)")
# Final save
with open(cache_path, "w") as f:
json.dump(cache, f)
have_embeddings = sum(1 for e in entities if e.get("summary_embedding"))
print(f" Done. {have_embeddings}/{len(entities)} entities have summary embeddings")
def generate_proposals():
db = FalkorDB(host='localhost', port=6379)
graph = db.select_graph(GROUP_ID)
# Pull all entities with embeddings
print(f"Fetching entities from group_id '{GROUP_ID}'...")
result = graph.query("""
MATCH (n:Entity)
WHERE n.name_embedding IS NOT NULL AND n.summary IS NOT NULL
RETURN n.uuid, n.name, n.summary, n.name_embedding
""")
entities = []
for row in result.result_set:
entities.append({
'uuid': row[0],
'name': row[1],
'summary': row[2],
'embedding': row[3],
})
print(f" Loaded {len(entities)} entities with embeddings")
# Compute summary embeddings (true ego signal, beyond name embeddings)
precompute_summary_embeddings(entities)
# Infer types for blocking
print("Inferring entity types for blocking...")
type_counts = defaultdict(int)
for e in entities:
e['inferred_type'] = infer_type(e['name'], e['summary'])
type_counts[e['inferred_type']] += 1
for t, c in sorted(type_counts.items(), key=lambda x: -x[1]):
print(f" {t}: {c}")
# Group by inferred type for blocking
blocks = defaultdict(list)
for e in entities:
blocks[e['inferred_type']].append(e)
# 'unknown' entities get compared against everything (they might be any type)
# Other types only get compared within their type block + against unknowns
print()
print("Comparing entities within type blocks...")
proposals = []
low_confidence = []
comparisons_done = 0
# Build comparison pairs
pairs_to_compare = []
typed_blocks = {t: ents for t, ents in blocks.items() if t != 'unknown'}
unknown_block = blocks.get('unknown', [])
# Within-type pairs (excluding unknown)
for t, ents in typed_blocks.items():
for i in range(len(ents)):
for j in range(i + 1, len(ents)):
pairs_to_compare.append((ents[i], ents[j]))
# Unknown vs unknown
for i in range(len(unknown_block)):
for j in range(i + 1, len(unknown_block)):
pairs_to_compare.append((unknown_block[i], unknown_block[j]))
# Unknown vs typed (unknowns might be any type)
for ent_unknown in unknown_block:
for t, ents in typed_blocks.items():
for ent_typed in ents:
pairs_to_compare.append((ent_unknown, ent_typed))
print(f" Pairs to compare: {len(pairs_to_compare):,}")
# Compute similarities
cache_neighbors = {}
def neighbors_cached(uuid):
if uuid not in cache_neighbors:
cache_neighbors[uuid] = get_neighbors(graph, uuid)
return cache_neighbors[uuid]
for ent_a, ent_b in pairs_to_compare:
comparisons_done += 1
if comparisons_done % 5000 == 0:
print(f" ... {comparisons_done:,} / {len(pairs_to_compare):,}")
# Compute name similarity (handles formal/informal pairs, acronyms)
name_sim = name_similarity(ent_a['name'], ent_b['name'])
# Compute ego similarity using SUMMARY embeddings (the actual semantic
# content), falling back to name embeddings if summaries unavailable.
# Summary similarity catches Aaron+Nelson where name similarity fails.
if ent_a.get('summary_embedding') and ent_b.get('summary_embedding'):
ego_sim_quick = cosine_similarity(ent_a['summary_embedding'], ent_b['summary_embedding'])
else:
ego_sim_quick = cosine_similarity(ent_a['embedding'], ent_b['embedding'])
# Pre-filter to avoid expensive neighbor query on obviously different pairs.
# Lowered thresholds vs DEG-RAG defaults because personal-corpus aliases often
# have low name_embedding similarity (different surface tokens) but high
# neighbor overlap. We let weaker name/ego signals through to the neighbor
# check, which can rescue them via containment metric.
if ego_sim_quick < 0.3 and name_sim < 0.15:
continue
# Full comparison
neighbors_a = neighbors_cached(ent_a['uuid'])
neighbors_b = neighbors_cached(ent_b['uuid'])
neighbor_sim = neighbor_jaccard(neighbors_a, neighbors_b)
confidence = combine_signals(name_sim, ego_sim_quick, neighbor_sim)
record = {
'entity_a': {
'uuid': ent_a['uuid'],
'name': ent_a['name'],
'type': ent_a['inferred_type'],
'summary': ent_a['summary'][:200],
'edge_count': get_edge_count(graph, ent_a['uuid']),
},
'entity_b': {
'uuid': ent_b['uuid'],
'name': ent_b['name'],
'type': ent_b['inferred_type'],
'summary': ent_b['summary'][:200],
'edge_count': get_edge_count(graph, ent_b['uuid']),
},
'confidence': round(confidence, 3),
'signals': {
'name_similarity': round(name_sim, 3),
'ego_similarity': round(ego_sim_quick, 3),
'neighbor_overlap': round(neighbor_sim, 3),
},
'shared_neighbors': sorted(list(neighbors_a & neighbors_b))[:10],
}
if confidence >= HIGH_CONFIDENCE_THRESHOLD:
proposals.append(record)
elif confidence >= LOW_CONFIDENCE_THRESHOLD:
low_confidence.append(record)
print(f"\nDone. Proposals: {len(proposals)}, Low-confidence: {len(low_confidence)}")
return proposals, low_confidence, len(entities), len(pairs_to_compare)
def write_proposals_log(proposals, low_confidence, total_entities, total_comparisons):
timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%d-%H%M")
out_path = PROPOSALS_DIR / f"proposals-{timestamp}.md"
proposals_sorted = sorted(proposals, key=lambda p: -p['confidence'])
low_sorted = sorted(low_confidence, key=lambda p: -p['confidence'])
lines = []
lines.append(f"# Consolidator 0.1 — Run {timestamp}")
lines.append("")
lines.append("## Statistics")
lines.append(f"- Entities scanned: {total_entities:,}")
lines.append(f"- Pairwise comparisons: {total_comparisons:,}")
lines.append(f"- High-confidence proposals (≥{HIGH_CONFIDENCE_THRESHOLD}): {len(proposals)}")
lines.append(f"- Low-confidence candidates ({LOW_CONFIDENCE_THRESHOLD}-{HIGH_CONFIDENCE_THRESHOLD}): {len(low_confidence)}")
lines.append("")
lines.append("## How to review")
lines.append("")
lines.append("For each proposal, mark your decision by changing `[ ]` to one of:")
lines.append("- `[APPROVE]` — execute this merge on next run")
lines.append("- `[REJECT]` — don't merge, don't propose again")
lines.append("- `[DEFER]` — re-surface in next run for further consideration")
lines.append("")
lines.append("Save the file when done. Do not modify proposal_id or uuid fields.")
lines.append("")
lines.append("---")
lines.append("")
lines.append(f"## Proposed Merges (n={len(proposals)})")
lines.append("")
for i, p in enumerate(proposals_sorted, start=1):
lines.append(f"### Proposal {i}")
lines.append("")
lines.append(f"**Decision:** [ ]")
lines.append("")
lines.append(f"**Confidence:** {p['confidence']}")
lines.append("")
lines.append(f"**Entity A:** \"{p['entity_a']['name']}\" (type: {p['entity_a']['type']}, {p['entity_a']['edge_count']} edges)")
lines.append(f" - uuid: `{p['entity_a']['uuid']}`")
lines.append(f" - summary: {p['entity_a']['summary']}")
lines.append("")
lines.append(f"**Entity B:** \"{p['entity_b']['name']}\" (type: {p['entity_b']['type']}, {p['entity_b']['edge_count']} edges)")
lines.append(f" - uuid: `{p['entity_b']['uuid']}`")
lines.append(f" - summary: {p['entity_b']['summary']}")
lines.append("")
lines.append(f"**Signals:**")
lines.append(f" - Name similarity: {p['signals']['name_similarity']}")
lines.append(f" - Ego (summary) similarity: {p['signals']['ego_similarity']}")
lines.append(f" - Neighbor overlap: {p['signals']['neighbor_overlap']}")
if p['shared_neighbors']:
shared_str = ', '.join(f'"{n}"' for n in p['shared_neighbors'][:8])
lines.append(f" - Shared neighbors (sample): {shared_str}")
lines.append("")
lines.append("**Optional rejection note:** ")
lines.append("")
lines.append("---")
lines.append("")
lines.append("")
lines.append(f"## Low-Confidence Candidates (n={len(low_confidence)}, informational only, no action)")
lines.append("")
for p in low_sorted[:30]:
lines.append(f"- **{p['confidence']}** \"{p['entity_a']['name']}\" + \"{p['entity_b']['name']}\" (name={p['signals']['name_similarity']}, ego={p['signals']['ego_similarity']}, nbr={p['signals']['neighbor_overlap']})")
if len(low_sorted) > 30:
lines.append(f"- *(...{len(low_sorted) - 30} more not shown)*")
out_path.write_text("\n".join(lines))
print(f"\nProposal log written to: {out_path}")
# Also save raw JSON for downstream tooling
json_path = PROPOSALS_DIR / f"proposals-{timestamp}.json"
with open(json_path, 'w') as f:
json.dump({
'run_timestamp': timestamp,
'statistics': {
'total_entities': total_entities,
'total_comparisons': total_comparisons,
'proposal_count': len(proposals),
'low_confidence_count': len(low_confidence),
},
'proposals': proposals_sorted,
'low_confidence': low_sorted,
}, f, indent=2)
print(f"Raw JSON: {json_path}")
def main():
print("=" * 70)
print("Consolidator 0.1 — Calibration Phase")
print("=" * 70)
print()
proposals, low_confidence, total_entities, total_comparisons = generate_proposals()
write_proposals_log(proposals, low_confidence, total_entities, total_comparisons)
print()
print("Next: review the proposals markdown file and mark APPROVE/REJECT/DEFER")
print("for each proposal. Re-run will read decisions and execute approved merges.")
if __name__ == "__main__":
main()
+179
View File
@@ -0,0 +1,179 @@
"""
Measure actual Graphiti BULK episode cost on a stratified sample.
Uses /episodes/bulk endpoint. Submits in small batches to avoid rate limits.
"""
import json, os, random, time
from pathlib import Path
import psycopg2, requests
from dotenv import load_dotenv
load_dotenv(Path.home() / "aaronai" / ".env")
GRAPHITI_URL = "http://localhost:8001"
PG_DSN = os.environ["PG_DSN"]
SAMPLE_SIZE = 50
BATCH_SIZE = 5
RANDOM_SEED = 42
OUT = Path.home() / "aaronai" / "experiments" / "graphiti_bulk_cost_test.json"
OUT.parent.mkdir(parents=True, exist_ok=True)
def fetch_stratified_sample():
conn = psycopg2.connect(PG_DSN)
cur = conn.cursor()
cur.execute("""
SELECT source, STRING_AGG(document, E'\\n\\n' ORDER BY id) AS full_doc
FROM embeddings
GROUP BY source
""")
sources = [(s, doc) for s, doc in cur.fetchall() if doc]
cur.close(); conn.close()
random.seed(RANDOM_SEED)
short = [(s, d) for s, d in sources if len(d) < 1000]
medium = [(s, d) for s, d in sources if 1000 <= len(d) < 5000]
long_ = [(s, d) for s, d in sources if len(d) >= 5000]
print(f"Pool: short={len(short)} medium={len(medium)} long={len(long_)}")
sample = (
random.sample(short, min(15, len(short))) +
random.sample(medium, min(25, len(medium))) +
random.sample(long_, min(10, len(long_)))
)
print(f"Sample: {len(sample)} sources, batch_size={BATCH_SIZE}")
return sample
def submit_bulk_batch(batch):
payload = {
"episodes": [
{
"name": source,
"content": doc[:12000],
"source_description": "pgvector_migration_bulk_test",
"timestamp": "2026-04-28T00:00:00",
}
for source, doc in batch
]
}
t0 = time.time()
try:
r = requests.post(f"{GRAPHITI_URL}/episodes/bulk", json=payload, timeout=900)
elapsed = time.time() - t0
return {
"batch_size": len(batch),
"status_code": r.status_code,
"elapsed_s": round(elapsed, 2),
"elapsed_per_episode_s": round(elapsed / len(batch), 2),
"response": r.json() if r.ok else None,
"error": None if r.ok else r.text[:500],
"sources": [s for s, _ in batch],
}
except Exception as e:
return {
"batch_size": len(batch),
"status_code": None,
"elapsed_s": round(time.time() - t0, 2),
"elapsed_per_episode_s": None,
"response": None,
"error": str(e)[:500],
"sources": [s for s, _ in batch],
}
def main():
print("=" * 60)
print("Graphiti BULK Migration Cost Test (Haiku 4.5)")
print("=" * 60)
print()
print("BEFORE running:")
print(" 1. Open https://console.anthropic.com/settings/usage")
print(" 2. Note current spend.")
print()
input("Press Enter when noted... ")
print()
sample = fetch_stratified_sample()
if not sample:
print("ERROR: empty sample"); return
batches = [sample[i:i+BATCH_SIZE] for i in range(0, len(sample), BATCH_SIZE)]
print(f"Submitting {len(batches)} batches of up to {BATCH_SIZE} episodes")
print()
results = []
total_start = time.time()
for i, batch in enumerate(batches, start=1):
avg_chars = int(sum(len(d) for _, d in batch) / len(batch))
print(f"[batch {i:2d}/{len(batches)}] n={len(batch)} avg_chars={avg_chars:6d}",
end=" ", flush=True)
result = submit_bulk_batch(batch)
results.append(result)
if result["error"]:
print(f" ERROR: {result['error'][:80]}")
if "429" in (result["error"] or "") or "rate" in (result["error"] or "").lower():
print(" Rate limited - pausing 30s before next batch")
time.sleep(30)
else:
print(f" {result['status_code']} {result['elapsed_s']}s "
f"({result['elapsed_per_episode_s']}s/episode)")
total_elapsed = time.time() - total_start
successful_batches = [r for r in results if r["error"] is None]
failed_batches = [r for r in results if r["error"] is not None]
successful_episodes = sum(r["batch_size"] for r in successful_batches)
failed_episodes = sum(r["batch_size"] for r in failed_batches)
summary = {
"sample_size": len(sample),
"batch_size": BATCH_SIZE,
"n_batches": len(batches),
"successful_batches": len(successful_batches),
"failed_batches": len(failed_batches),
"successful_episodes": successful_episodes,
"failed_episodes": failed_episodes,
"total_elapsed_s": round(total_elapsed, 1),
"mean_elapsed_per_episode_s": round(
sum(r["elapsed_s"] for r in successful_batches) /
max(successful_episodes, 1), 2
) if successful_episodes else None,
"results": results,
}
conn = psycopg2.connect(PG_DSN)
cur = conn.cursor()
cur.execute("SELECT COUNT(DISTINCT source) FROM embeddings")
total_sources = cur.fetchone()[0]
cur.close(); conn.close()
summary["total_corpus_sources"] = total_sources
if summary["mean_elapsed_per_episode_s"]:
summary["estimated_migration_hours"] = round(
total_sources * summary["mean_elapsed_per_episode_s"] / 3600, 1
)
OUT.write_text(json.dumps(summary, indent=2))
print()
print("=" * 60)
print("RESULTS")
print("=" * 60)
print(f"Episodes: {summary['successful_episodes']}/{summary['sample_size']} succeeded")
print(f"Batches: {summary['successful_batches']}/{summary['n_batches']} succeeded")
print(f"Total elapsed: {summary['total_elapsed_s']}s")
if summary["mean_elapsed_per_episode_s"]:
print(f"Mean per episode: {summary['mean_elapsed_per_episode_s']}s")
print(f"Total corpus sources: {summary['total_corpus_sources']}")
print(f"Estimated migration runtime: {summary['estimated_migration_hours']} hours")
print()
print(f"AFTER:")
print(f" Wait 5 min; note new Anthropic spend; subtract from $28.61 baseline.")
print(f" delta / {summary['successful_episodes']} = per-episode cost")
print(f" per-episode * {summary['total_corpus_sources']} = full migration estimate")
print()
print(f"Full results: {OUT}")
if __name__ == "__main__":
main()
+122
View File
@@ -0,0 +1,122 @@
"""
Retest just the previously-failed batches after raising MAX_QUEUED_QUERIES.
Reads failed sources from graphiti_bulk_cost_test.json and resubmits.
"""
import json, os, time
from pathlib import Path
import psycopg2, requests
from dotenv import load_dotenv
load_dotenv(Path.home() / "aaronai" / ".env")
GRAPHITI_URL = "http://localhost:8001"
PG_DSN = os.environ["PG_DSN"]
BATCH_SIZE = 5
PRIOR_RESULTS = Path.home() / "aaronai" / "experiments" / "graphiti_bulk_cost_test.json"
OUT = Path.home() / "aaronai" / "experiments" / "graphiti_bulk_retry.json"
def fetch_doc_for_source(cur, source):
cur.execute("""
SELECT STRING_AGG(document, E'\\n\\n' ORDER BY id)
FROM embeddings WHERE source = %s
""", (source,))
row = cur.fetchone()
return row[0] if row else None
def submit_bulk_batch(batch):
payload = {"episodes": [
{"name": s, "content": d[:12000],
"source_description": "pgvector_migration_bulk_retry",
"timestamp": "2026-04-28T00:00:00"}
for s, d in batch
]}
t0 = time.time()
try:
r = requests.post(f"{GRAPHITI_URL}/episodes/bulk", json=payload, timeout=900)
return {
"batch_size": len(batch),
"status_code": r.status_code,
"elapsed_s": round(time.time() - t0, 2),
"elapsed_per_episode_s": round((time.time() - t0) / len(batch), 2),
"error": None if r.ok else r.text[:500],
"sources": [s for s, _ in batch],
}
except Exception as e:
return {
"batch_size": len(batch),
"status_code": None,
"elapsed_s": round(time.time() - t0, 2),
"elapsed_per_episode_s": None,
"error": str(e)[:500],
"sources": [s for s, _ in batch],
}
def main():
prior = json.loads(PRIOR_RESULTS.read_text())
failed_sources = []
for batch_result in prior["results"]:
if batch_result["error"] is not None:
failed_sources.extend(batch_result["sources"])
print(f"Retrying {len(failed_sources)} previously-failed sources")
conn = psycopg2.connect(PG_DSN)
cur = conn.cursor()
sources_with_docs = []
for s in failed_sources:
doc = fetch_doc_for_source(cur, s)
if doc:
sources_with_docs.append((s, doc))
else:
print(f" WARN: could not find doc for source {s}")
cur.close(); conn.close()
print(f"Loaded {len(sources_with_docs)} source docs")
print()
batches = [sources_with_docs[i:i+BATCH_SIZE]
for i in range(0, len(sources_with_docs), BATCH_SIZE)]
results = []
total_start = time.time()
for i, batch in enumerate(batches, start=1):
avg = int(sum(len(d) for _, d in batch) / len(batch))
print(f"[batch {i:2d}/{len(batches)}] n={len(batch)} avg_chars={avg:6d}",
end=" ", flush=True)
result = submit_bulk_batch(batch)
results.append(result)
if result["error"]:
print(f" ERROR: {result['error'][:80]}")
else:
print(f" {result['status_code']} {result['elapsed_s']}s")
total_elapsed = time.time() - total_start
successful = [r for r in results if r["error"] is None]
failed = [r for r in results if r["error"] is not None]
summary = {
"n_retry_sources": len(sources_with_docs),
"n_batches": len(batches),
"successful_batches": len(successful),
"failed_batches": len(failed),
"successful_episodes": sum(r["batch_size"] for r in successful),
"failed_episodes": sum(r["batch_size"] for r in failed),
"total_elapsed_s": round(total_elapsed, 1),
"results": results,
}
OUT.write_text(json.dumps(summary, indent=2))
print()
print("=" * 60)
print("RETRY RESULTS")
print("=" * 60)
print(f"Episodes: {summary['successful_episodes']}/{len(sources_with_docs)} succeeded")
print(f"Batches: {summary['successful_batches']}/{summary['n_batches']} succeeded")
print(f"Total elapsed: {summary['total_elapsed_s']}s")
print()
print(f"Full results: {OUT}")
if __name__ == "__main__":
main()
+93
View File
@@ -0,0 +1,93 @@
"""Retry attempt #2 — for sources that timed out after MAX_QUEUED_QUERIES bump."""
import json, os, time
from pathlib import Path
import psycopg2, requests
from dotenv import load_dotenv
load_dotenv(Path.home() / "aaronai" / ".env")
GRAPHITI_URL = "http://localhost:8001"
PG_DSN = os.environ["PG_DSN"]
BATCH_SIZE = 3 # smaller batches given timeouts
PRIOR = Path.home() / "aaronai" / "experiments" / "graphiti_bulk_retry.json"
OUT = Path.home() / "aaronai" / "experiments" / "graphiti_bulk_retry2.json"
def fetch_doc(cur, source):
cur.execute("SELECT STRING_AGG(document, E'\\n\\n' ORDER BY id) FROM embeddings WHERE source = %s", (source,))
row = cur.fetchone()
return row[0] if row else None
def submit_batch(batch):
payload = {"episodes": [
{"name": s, "content": d[:12000],
"source_description": "pgvector_migration_bulk_retry2",
"timestamp": "2026-04-28T00:00:00"}
for s, d in batch
]}
t0 = time.time()
try:
r = requests.post(f"{GRAPHITI_URL}/episodes/bulk", json=payload, timeout=900)
return {
"batch_size": len(batch),
"status_code": r.status_code,
"elapsed_s": round(time.time() - t0, 2),
"error": None if r.ok else r.text[:500],
"sources": [s for s, _ in batch],
}
except Exception as e:
return {
"batch_size": len(batch),
"status_code": None,
"elapsed_s": round(time.time() - t0, 2),
"error": str(e)[:500],
"sources": [s for s, _ in batch],
}
def main():
prior = json.loads(PRIOR.read_text())
failed = []
for r in prior["results"]:
if r["error"] is not None:
failed.extend(r["sources"])
print(f"Retry #2: {len(failed)} sources still failing")
conn = psycopg2.connect(PG_DSN); cur = conn.cursor()
sources = []
for s in failed:
d = fetch_doc(cur, s)
if d: sources.append((s, d))
cur.close(); conn.close()
batches = [sources[i:i+BATCH_SIZE] for i in range(0, len(sources), BATCH_SIZE)]
print(f"Submitting {len(batches)} batches of up to {BATCH_SIZE}\n")
results = []
for i, batch in enumerate(batches, 1):
avg = int(sum(len(d) for _, d in batch) / len(batch))
print(f"[batch {i}/{len(batches)}] n={len(batch)} avg_chars={avg:6d}", end=" ", flush=True)
r = submit_batch(batch)
results.append(r)
if r["error"]: print(f" ERROR: {r['error'][:80]}")
else: print(f" {r['status_code']} {r['elapsed_s']}s")
succ = [r for r in results if r["error"] is None]
fail = [r for r in results if r["error"] is not None]
summary = {
"n_sources": len(sources),
"successful_batches": len(succ),
"failed_batches": len(fail),
"successful_episodes": sum(r["batch_size"] for r in succ),
"failed_episodes": sum(r["batch_size"] for r in fail),
"results": results,
}
OUT.write_text(json.dumps(summary, indent=2))
print()
print(f"Episodes: {summary['successful_episodes']}/{len(sources)} succeeded")
print(f"Full results: {OUT}")
if __name__ == "__main__":
main()
+175
View File
@@ -0,0 +1,175 @@
"""
Measure actual Graphiti episode-add cost on a stratified sample of pgvector sources.
"""
import json, os, random, time
from pathlib import Path
import psycopg2, requests
from dotenv import load_dotenv
load_dotenv(Path.home() / "aaronai" / ".env")
GRAPHITI_URL = "http://localhost:8001"
PG_DSN = os.environ["PG_DSN"]
SAMPLE_SIZE = 50
RANDOM_SEED = 42
OUT = Path.home() / "aaronai" / "experiments" / "graphiti_cost_test.json"
OUT.parent.mkdir(parents=True, exist_ok=True)
def fetch_stratified_sample():
conn = psycopg2.connect(PG_DSN)
cur = conn.cursor()
cur.execute("""
SELECT source, STRING_AGG(document, E'\\n\\n' ORDER BY id) AS full_doc
FROM embeddings
GROUP BY source
""")
sources = [(s, doc) for s, doc in cur.fetchall() if doc]
cur.close(); conn.close()
random.seed(RANDOM_SEED)
short = [(s, d) for s, d in sources if len(d) < 1000]
medium = [(s, d) for s, d in sources if 1000 <= len(d) < 5000]
long_ = [(s, d) for s, d in sources if len(d) >= 5000]
print(f"Pool: short={len(short)} medium={len(medium)} long={len(long_)}")
sample = (
random.sample(short, min(15, len(short))) +
random.sample(medium, min(25, len(medium))) +
random.sample(long_, min(10, len(long_)))
)
print(f"Sample: {len(sample)} sources")
return sample
def submit_episode(source: str, document: str) -> dict:
payload = {
"name": source,
"content": document[:12000],
"source_description": "pgvector_migration_cost_test",
"timestamp": "2026-04-28T00:00:00",
}
t0 = time.time()
try:
r = requests.post(f"{GRAPHITI_URL}/episodes", json=payload, timeout=600)
return {
"source": source,
"doc_chars": len(document),
"doc_chars_sent": min(len(document), 12000),
"status_code": r.status_code,
"elapsed_s": round(time.time() - t0, 2),
"error": None if r.ok else r.text[:500],
}
except Exception as e:
return {
"source": source,
"doc_chars": len(document),
"doc_chars_sent": min(len(document), 12000),
"status_code": None,
"elapsed_s": round(time.time() - t0, 2),
"error": str(e)[:500],
}
def main():
print("=" * 60)
print("Graphiti Migration Cost Test (Haiku 4.5)")
print("=" * 60)
print()
print("BEFORE running:")
print(" 1. Open https://console.anthropic.com/settings/usage")
print(" 2. Note current spend.")
print()
input("Press Enter when noted... ")
print()
sample = fetch_stratified_sample()
if not sample:
print("ERROR: empty sample"); return
# Smoke test
print(f"Smoke test on first source ({sample[0][0][:50]}...):")
smoke = submit_episode(*sample[0])
print(f" status={smoke['status_code']} elapsed={smoke['elapsed_s']}s")
if smoke["error"]:
print(f" ERROR: {smoke['error']}")
OUT.write_text(json.dumps({"smoke_test": smoke}, indent=2))
print("Halted — fix smoke test before bulk run.")
return
print(f" OK. Proceeding with {len(sample)} sources.")
print()
results = [smoke]
total_start = time.time()
for i, (source, doc) in enumerate(sample[1:], start=2):
bucket = "short" if len(doc) < 1000 else "medium" if len(doc) < 5000 else "long"
print(f"[{i:2d}/{len(sample)}] [{bucket:6s}] [{len(doc):6d}c] {source[:50]:50s}", end=" ", flush=True)
result = submit_episode(source, doc)
results.append(result)
if result["error"]:
print(f" ERROR: {result['error'][:80]}")
else:
print(f" {result['status_code']} {result['elapsed_s']}s")
total_elapsed = time.time() - total_start
successful = [r for r in results if r["error"] is None]
failed = [r for r in results if r["error"] is not None]
summary = {
"sample_size": len(sample),
"successful": len(successful),
"failed": len(failed),
"total_elapsed_s": round(total_elapsed, 1),
"mean_elapsed_per_episode_s": round(
sum(r["elapsed_s"] for r in successful) / max(len(successful), 1), 2
),
"by_bucket": {},
"results": results,
}
for bname, lo, hi in [("short", 0, 1000), ("medium", 1000, 5000), ("long", 5000, 10**9)]:
b = [r for r in successful if lo <= r["doc_chars"] < hi]
if b:
summary["by_bucket"][bname] = {
"n": len(b),
"mean_elapsed_s": round(sum(r["elapsed_s"] for r in b) / len(b), 2),
"mean_chars": int(sum(r["doc_chars"] for r in b) / len(b)),
}
conn = psycopg2.connect(PG_DSN)
cur = conn.cursor()
cur.execute("SELECT COUNT(DISTINCT source) FROM embeddings")
total_sources = cur.fetchone()[0]
cur.close(); conn.close()
summary["total_corpus_sources"] = total_sources
summary["estimated_migration_hours"] = round(
total_sources * summary["mean_elapsed_per_episode_s"] / 3600, 1
)
OUT.write_text(json.dumps(summary, indent=2))
print()
print("=" * 60)
print("RESULTS")
print("=" * 60)
print(f"Sample: {summary['successful']}/{summary['sample_size']} succeeded, {summary['failed']} failed")
print(f"Total elapsed: {summary['total_elapsed_s']}s")
print(f"Mean per episode: {summary['mean_elapsed_per_episode_s']}s")
for bucket, stats in summary["by_bucket"].items():
print(f" {bucket:6s} n={stats['n']:3d} chars~{stats['mean_chars']:6d} elapsed~{stats['mean_elapsed_s']}s")
print()
print(f"Total corpus sources: {summary['total_corpus_sources']}")
print(f"Estimated migration runtime: {summary['estimated_migration_hours']} hours")
print()
print("AFTER:")
print(" Wait 5 min; note new Anthropic spend; subtract.")
print(f" test_cost / {summary['successful']} = per-episode cost")
print(f" per-episode * {summary['total_corpus_sources']} = full migration estimate")
print()
print(f"Full results: {OUT}")
if __name__ == "__main__":
main()
+155
View File
@@ -0,0 +1,155 @@
"""
E1.4 per-source predicate diversity comparison — fixed version.
Looks up episode uuids by name in both production and cascade graphs.
"""
import json
from collections import defaultdict
from falkordb import FalkorDB
E14_RESULTS = "/home/aaron/aaronai/experiments/e14_cascade_results.json"
PRODUCTION_GROUP = "aaron"
CASCADE_GROUP = "aaron_cascade_e14"
def get_predicates_for_episode(graph, episode_uuid):
query = """
MATCH ()-[r:RELATES_TO]->()
WHERE $uuid IN r.episodes
RETURN count(DISTINCT r.name) AS predicate_count
"""
result = graph.query(query, {"uuid": episode_uuid})
rows = result.result_set
return rows[0][0] if rows else 0
def get_edge_count_for_episode(graph, episode_uuid):
query = """
MATCH ()-[r:RELATES_TO]->()
WHERE $uuid IN r.episodes
RETURN count(r) AS edge_count
"""
result = graph.query(query, {"uuid": episode_uuid})
rows = result.result_set
return rows[0][0] if rows else 0
def find_episode_uuid(graph, source_name):
query = """
MATCH (e:Episodic {name: $name})
RETURN e.uuid AS uuid
LIMIT 1
"""
result = graph.query(query, {"name": source_name})
rows = result.result_set
return rows[0][0] if rows else None
def main():
db = FalkorDB(host='localhost', port=6379)
prod_graph = db.select_graph(PRODUCTION_GROUP)
cascade_graph = db.select_graph(CASCADE_GROUP)
with open(E14_RESULTS) as f:
e14 = json.load(f)
sources = [r for r in e14['results'] if 'submit_result' in r]
print(f"Analyzing {len(sources)} sources...")
print()
comparisons = []
missing_prod = 0
missing_cascade = 0
for src in sources:
name = src['name']
bucket = src['bucket']
prod_uuid = find_episode_uuid(prod_graph, name)
cascade_uuid = find_episode_uuid(cascade_graph, name)
if not prod_uuid:
missing_prod += 1
print(f" WARN: missing in production: {name}")
continue
if not cascade_uuid:
missing_cascade += 1
print(f" WARN: missing in cascade: {name}")
continue
prod_preds = get_predicates_for_episode(prod_graph, prod_uuid)
cascade_preds = get_predicates_for_episode(cascade_graph, cascade_uuid)
prod_edges = get_edge_count_for_episode(prod_graph, prod_uuid)
cascade_edges = get_edge_count_for_episode(cascade_graph, cascade_uuid)
comparisons.append({
"name": name,
"bucket": bucket,
"prod_preds": prod_preds,
"cascade_preds": cascade_preds,
"delta_preds": cascade_preds - prod_preds,
"prod_edges": prod_edges,
"cascade_edges": cascade_edges,
"delta_edges": cascade_edges - prod_edges,
})
if missing_prod or missing_cascade:
print()
print(f"Missing: {missing_prod} prod, {missing_cascade} cascade")
print()
if not comparisons:
print("No comparable sources found. Aborting.")
return
# Per-source detail
print(f"{'Bucket':<10} {'Source':<58} {'Preds A→B':<14} {'Δ':<6} {'Edges A→B':<14} {'Δ'}")
print("-" * 115)
for c in sorted(comparisons, key=lambda x: (x['bucket'], x['name'])):
name_short = (c['name'][:55] + '..') if len(c['name']) > 58 else c['name']
preds_str = f"{c['prod_preds']}{c['cascade_preds']}"
edges_str = f"{c['prod_edges']}{c['cascade_edges']}"
print(f"{c['bucket']:<10} {name_short:<58} {preds_str:<14} {c['delta_preds']:+d} {edges_str:<14} {c['delta_edges']:+d}")
# Per-bucket aggregation
print()
print("=" * 115)
print("PER-BUCKET AGGREGATION")
print("=" * 115)
by_bucket = defaultdict(list)
for c in comparisons:
by_bucket[c['bucket']].append(c)
for bucket in ['high', 'mid', 'low', 'document']:
items = by_bucket.get(bucket, [])
if not items:
continue
n = len(items)
sum_pp = sum(c['prod_preds'] for c in items)
sum_cp = sum(c['cascade_preds'] for c in items)
sum_pe = sum(c['prod_edges'] for c in items)
sum_ce = sum(c['cascade_edges'] for c in items)
positive = sum(1 for c in items if c['delta_preds'] > 0)
negative = sum(1 for c in items if c['delta_preds'] < 0)
flat = sum(1 for c in items if c['delta_preds'] == 0)
pct_pred = ((sum_cp - sum_pp) / sum_pp * 100) if sum_pp else 0
pct_edge = ((sum_ce - sum_pe) / sum_pe * 100) if sum_pe else 0
print(f"\n{bucket.upper()} (n={n}):")
print(f" Predicates: {sum_pp}{sum_cp} ({pct_pred:+.1f}%)")
print(f" Edges: {sum_pe}{sum_ce} ({pct_edge:+.1f}%)")
print(f" Outcomes: {positive} positive, {flat} flat, {negative} negative")
# Aggregate
print()
print("=" * 115)
print(f"AGGREGATE (n={len(comparisons)})")
print("=" * 115)
total_pp = sum(c['prod_preds'] for c in comparisons)
total_cp = sum(c['cascade_preds'] for c in comparisons)
total_pe = sum(c['prod_edges'] for c in comparisons)
total_ce = sum(c['cascade_edges'] for c in comparisons)
print(f" Predicates: {total_pp}{total_cp} ({(total_cp-total_pp)/total_pp*100:+.1f}%)")
print(f" Edges: {total_pe}{total_ce} ({(total_ce-total_pe)/total_pe*100:+.1f}%)")
out_path = "/home/aaron/aaronai/experiments/e14_per_source_comparison.json"
with open(out_path, "w") as f:
json.dump(comparisons, f, indent=2)
print()
print(f"Saved to {out_path}")
if __name__ == "__main__":
main()
+208
View File
@@ -0,0 +1,208 @@
#!/usr/bin/env python3
"""E1.4 orchestration — cascade re-extraction at n=30, group_id=aaron_cascade_e14."""
import json
import os
import requests
import time
from pathlib import Path
import psycopg2
from dotenv import load_dotenv
load_dotenv(Path.home() / "aaronai" / ".env")
EXPERIMENTS = Path.home() / "aaronai" / "experiments"
SAMPLE_FILE = EXPERIMENTS / "e14_sample.json"
RESULTS_FILE = EXPERIMENTS / "e14_cascade_results.json"
PG_DSN = os.environ["PG_DSN"]
SIDECAR_URL = "http://localhost:8001"
TEST_GROUP_ID = "aaron_cascade_e14"
MAX_DOC_CHARS = 12000
METADATA_PROMPT = """You are a metadata extraction system. Given a document, produce structural and content metadata in strict JSON format.
Do not summarize the content beyond the one-sentence summary field. Do not extract entities or relationships. Do not interpret meaning. Produce only the metadata schema below.
Output JSON only. No prose, no explanation, no markdown code fences.
Schema:
{
"language": "<ISO 639-1 code>",
"char_length": <integer>,
"primary_format": "<prose|slides|code|structured|mixed>",
"structural_signals": {
"has_headings": <boolean>,
"has_bullet_lists": <boolean>,
"has_numbered_lists": <boolean>,
"has_tables": <boolean>,
"has_code_blocks": <boolean>,
"has_dates": <boolean>
},
"content_signals": {
"has_named_people": <boolean>,
"has_institutional_language": <boolean>,
"has_technical_terminology": <boolean>,
"has_first_person": <boolean>,
"has_quotations": <boolean>
},
"domain_class": "<technical|administrative|educational|personal|conversational>",
"one_sentence_summary": "<one sentence describing what the document is about>"
}
Document:
"""
def get_pg():
return psycopg2.connect(PG_DSN)
def fetch_source_text(source):
conn = get_pg()
cur = conn.cursor()
cur.execute("""
SELECT STRING_AGG(document, E'\n\n' ORDER BY id) AS full_doc
FROM embeddings WHERE source = %s
""", (source,))
row = cur.fetchone()
conn.close()
if row is None or row[0] is None:
return None
return row[0]
def run_mistral_metadata(text, max_retries=2):
truncated = text[:MAX_DOC_CHARS]
prompt = METADATA_PROMPT + truncated
last_err = None
for attempt in range(max_retries):
try:
response = requests.post(
"http://localhost:11434/api/generate",
json={"model": "mistral:latest", "prompt": prompt, "stream": False, "format": "json"},
timeout=300,
)
response.raise_for_status()
raw = response.json()["response"]
try:
metadata = json.loads(raw)
metadata["char_length"] = len(truncated)
return metadata
except json.JSONDecodeError:
return {"error": "JSON parse failed", "raw": raw[:500]}
except (requests.exceptions.ReadTimeout, requests.exceptions.ConnectionError) as e:
last_err = e
if attempt < max_retries - 1:
print(f" (retry {attempt+1} after {type(e).__name__})", end=" ", flush=True)
time.sleep(5)
continue
return {"error": f"After {max_retries} retries: {last_err}"}
def format_metadata_as_orientation(metadata):
if "error" in metadata:
return None
summary = metadata.get("one_sentence_summary", "")
domain = metadata.get("domain_class", "unknown")
fmt = metadata.get("primary_format", "unknown")
return (
f"This is a {domain} document in {fmt} format. "
f"Summary: {summary} "
f"This metadata is provided to orient your extraction, not to constrain it. "
f"Extract entities and relationships freely from the document text itself; "
f"the metadata is descriptive context, not a checklist."
)
def submit_episode_singular(name, content, custom_instructions):
payload = {
"name": name,
"content": content[:MAX_DOC_CHARS],
"source_description": "e14_replication_run",
"timestamp": "2026-04-29T00:00:00",
"group_id": TEST_GROUP_ID,
"custom_extraction_instructions": custom_instructions,
}
response = requests.post(f"{SIDECAR_URL}/episodes", json=payload, timeout=300)
response.raise_for_status()
return response.json()
def load_state():
if RESULTS_FILE.exists():
with open(RESULTS_FILE) as f:
data = json.load(f)
return data.get("results", []), {r["name"] for r in data.get("results", []) if "submit_result" in r}
return [], set()
def main():
with open(SAMPLE_FILE) as f:
sample = json.load(f)
selected = sample["selected"]
results, completed = load_state()
if completed:
print(f"Resuming — {len(completed)} sources already completed, {len(selected) - len(completed)} remaining\n")
else:
print(f"E1.4 cascade replication — {len(selected)} episodes to group_id={TEST_GROUP_ID}\n")
for i, ep in enumerate(selected, 1):
name = ep["name"]
bucket = ep["bucket"]
if name in completed:
print(f"[{i}/{len(selected)}] [{bucket}] {name} — SKIP (already completed)")
continue
print(f"[{i}/{len(selected)}] [{bucket}] {name}")
record = {"name": name, "bucket": bucket, "tier1_entities": ep["entities"]}
if ep.get("subtype"):
record["subtype"] = ep["subtype"]
print(f" Fetching source text...", end=" ", flush=True)
text = fetch_source_text(name)
if text is None:
print("FAILED — no chunks in pgvector")
record["error"] = "no source text"
results.append(record)
with open(RESULTS_FILE, "w") as f:
json.dump({"results": results}, f, indent=2, default=str)
continue
record["doc_chars"] = len(text)
print(f"{len(text)} chars")
print(f" Generating Mistral metadata...", end=" ", flush=True)
t0 = time.time()
metadata = run_mistral_metadata(text)
elapsed = time.time() - t0
record["metadata"] = metadata
record["metadata_elapsed_s"] = round(elapsed, 1)
if "error" in metadata:
print(f"FAILED in {elapsed:.1f}s")
else:
print(f"{elapsed:.1f}s — domain={metadata.get('domain_class')}, format={metadata.get('primary_format')}")
custom_instructions = format_metadata_as_orientation(metadata)
record["custom_extraction_instructions"] = custom_instructions
print(f" Submitting via /episodes...", end=" ", flush=True)
t0 = time.time()
try:
result = submit_episode_singular(name, text, custom_instructions)
elapsed = time.time() - t0
print(f"{elapsed:.1f}s — OK")
record["submit_elapsed_s"] = round(elapsed, 1)
record["submit_result"] = result
except Exception as e:
elapsed = time.time() - t0
print(f"{elapsed:.1f}s — FAILED: {e}")
record["submit_error"] = str(e)
results.append(record)
with open(RESULTS_FILE, "w") as f:
json.dump({"results": results}, f, indent=2, default=str)
print()
print(f"\nDone. Results saved to {RESULTS_FILE}")
if __name__ == "__main__":
main()
+160
View File
@@ -0,0 +1,160 @@
#!/usr/bin/env python3
"""E1.4 sample selection — n=30 stratified, excluding E1's 10 sources."""
import json
import re
import subprocess
from pathlib import Path
EXPERIMENTS = Path.home() / "aaronai" / "experiments"
E1_SAMPLE_FILE = EXPERIMENTS / "cascade_reextract_sample.json"
OUTPUT = EXPERIMENTS / "e14_sample.json"
TARGETS = {"high": 8, "mid": 8, "low": 8, "document": 6}
def query_episode_counts():
query = ("MATCH (e:Episodic) OPTIONAL MATCH (e)-[r]-(n:Entity) "
"RETURN e.name AS name, count(distinct n) AS entities "
"ORDER BY entities DESC")
result = subprocess.run(
["docker", "exec", "falkordb", "redis-cli", "GRAPH.QUERY", "aaron", query],
capture_output=True, text=True
)
lines = [l for l in result.stdout.split("\n") if l.strip()]
episodes = []
i = 0
while i < len(lines):
if lines[i] == "name":
i += 2
continue
if lines[i].startswith("Cached") or lines[i].startswith("Query"):
break
if i + 1 < len(lines):
try:
count = int(lines[i + 1])
episodes.append({"name": lines[i], "entities": count})
i += 2
except ValueError:
i += 1
else:
i += 1
return episodes
def is_document(name):
return any(name.lower().endswith(ext) for ext in (".pdf", ".docx", ".pptx", ".txt", ".md"))
def doc_subtype(name):
"""Categorize document by likely subtype."""
s = name.lower()
if "syllabus" in s or "ind study" in s or "_is" in s:
return "academic"
if "annual" in s or "report" in s or "_ar20" in s or "rtpcc" in s or "novo" in s:
return "reference"
if "cv" in s or "resume" in s or "application" in s or "cover letter" in s:
return "reference"
if "marquee" in s or "pptx" in s.lower() or "presentation" in s:
return "creative"
return "other"
def main():
print("Fetching episode entity counts from Tier 1 graph...")
episodes = query_episode_counts()
print(f"Got {len(episodes)} episodes")
# Load E1's sample to exclude
with open(E1_SAMPLE_FILE) as f:
e1_sample = json.load(f)
e1_names = {ep["name"] for ep in e1_sample["selected"]}
print(f"Excluding {len(e1_names)} sources from E1")
# Quartile boundaries
counts = sorted([e["entities"] for e in episodes], reverse=True)
n = len(counts)
top_q = counts[n // 4]
bottom_q = counts[3 * n // 4]
print(f"Quartile boundaries: top≥{top_q}, mid={bottom_q+1}-{top_q-1}, low≤{bottom_q}")
# Filter out E1 and bucket
available = [e for e in episodes if e["name"] not in e1_names]
high = [e for e in available if e["entities"] >= top_q and not is_document(e["name"])]
mid = [e for e in available if bottom_q < e["entities"] < top_q and not is_document(e["name"])]
low = [e for e in available if e["entities"] <= bottom_q and not is_document(e["name"])]
docs = [e for e in available if is_document(e["name"]) and e["entities"] >= 5]
print(f"\nAvailable after E1 exclusion:")
print(f" High-density: {len(high)}")
print(f" Mid-density: {len(mid)}")
print(f" Low-density: {len(low)}")
print(f" Documents: {len(docs)}")
# For high/mid/low: take from middle of bucket (avoids edge cases)
def pick(bucket, n):
if len(bucket) < n:
print(f" WARNING: only {len(bucket)} available, asked for {n}")
return bucket
mid_idx = len(bucket) // 2
start = max(0, mid_idx - n // 2)
return bucket[start:start + n]
selected = []
for ep in pick(high, TARGETS["high"]):
ep["bucket"] = "high"
selected.append(ep)
for ep in pick(mid, TARGETS["mid"]):
ep["bucket"] = "mid"
selected.append(ep)
for ep in pick(low, TARGETS["low"]):
ep["bucket"] = "low"
selected.append(ep)
# For documents: stratify by subtype, target 2 academic, 2 creative, 2 reference
doc_targets = {"academic": 2, "creative": 2, "reference": 2}
docs_by_subtype = {}
for ep in docs:
st = doc_subtype(ep["name"])
ep["subtype"] = st
docs_by_subtype.setdefault(st, []).append(ep)
print(f"\n Doc subtypes available: {[(k, len(v)) for k, v in docs_by_subtype.items()]}")
# Pick from middle of each subtype bucket
for subtype, target in doc_targets.items():
sub_docs = docs_by_subtype.get(subtype, [])
picked = pick(sub_docs, target)
for ep in picked:
ep["bucket"] = "document"
selected.append(ep)
# If we're short on documents (e.g., subtype underrepresented), fill from "other"
doc_count = sum(1 for s in selected if s.get("bucket") == "document")
if doc_count < TARGETS["document"]:
shortage = TARGETS["document"] - doc_count
leftover = [e for e in docs if e["name"] not in {s["name"] for s in selected}]
for ep in leftover[:shortage]:
ep["bucket"] = "document"
ep["subtype"] = ep.get("subtype") or doc_subtype(ep["name"])
selected.append(ep)
print(f"\nSelected {len(selected)} episodes for E1.4:")
for ep in selected:
sub = f"/{ep.get('subtype')}" if ep.get('bucket') == 'document' else ""
print(f" [{ep['bucket']}{sub:>10}] {ep['entities']:>3}e {ep['name']}")
with open(OUTPUT, "w") as f:
json.dump({
"metadata": {
"purpose": "E1.4 cascade re-extraction replication (n=30)",
"exclusions": "E1's 10 sources",
"stratification": {**TARGETS, "document_subtypes": doc_targets},
"quartile_top": top_q,
"quartile_bottom": bottom_q,
},
"selected": selected,
}, f, indent=2)
print(f"\nSaved to {OUTPUT}")
if __name__ == "__main__":
main()
+246
View File
@@ -0,0 +1,246 @@
"""
E1.6 analysis — correlate domain-purity ratings with cascade outcomes.
Applies pre-registered decision rules from E1.6 protocol.
"""
import json
from collections import defaultdict
RATINGS_PATH = "/home/aaron/aaronai/experiments/e16_purity_ratings.json"
COMPARISON_PATH = "/home/aaron/aaronai/experiments/e14_per_source_comparison.json"
def spearman(xs, ys):
"""Compute Spearman rank correlation."""
n = len(xs)
if n < 2:
return None
# Rank the values
def rank(values):
sorted_idx = sorted(range(len(values)), key=lambda i: values[i])
ranks = [0] * len(values)
i = 0
while i < len(values):
j = i
while j + 1 < len(values) and values[sorted_idx[j+1]] == values[sorted_idx[i]]:
j += 1
avg_rank = (i + j) / 2 + 1
for k in range(i, j + 1):
ranks[sorted_idx[k]] = avg_rank
i = j + 1
return ranks
rx = rank(xs)
ry = rank(ys)
mean_rx = sum(rx) / n
mean_ry = sum(ry) / n
num = sum((rx[i] - mean_rx) * (ry[i] - mean_ry) for i in range(n))
den_x = (sum((rx[i] - mean_rx) ** 2 for i in range(n))) ** 0.5
den_y = (sum((ry[i] - mean_ry) ** 2 for i in range(n))) ** 0.5
if den_x == 0 or den_y == 0:
return None
return num / (den_x * den_y)
def main():
with open(RATINGS_PATH) as f:
ratings_data = json.load(f)
with open(COMPARISON_PATH) as f:
comparisons = json.load(f)
ratings_by_name = {r['name']: r for r in ratings_data['ratings']}
comp_by_name = {c['name']: c for c in comparisons}
# Join ratings with cascade outcomes
joined = []
for name, rating in ratings_by_name.items():
if name in comp_by_name:
comp = comp_by_name[name]
joined.append({
'name': name,
'binary': rating['binary'],
'score': rating['score'],
'note': rating.get('note'),
'bucket': comp['bucket'],
'delta_preds': comp['delta_preds'],
'delta_edges': comp['delta_edges'],
'prod_preds': comp['prod_preds'],
'cascade_preds': comp['cascade_preds'],
})
print("=" * 100)
print(f"E1.6 ANALYSIS — Domain Purity vs Cascade Outcome (n={len(joined)})")
print("=" * 100)
# Per-source detail with rating
print()
print(f"{'Bucket':<10} {'Source':<48} {'Domain':<8} {'Score':<6} {'Δpreds':<8} {'Δedges':<8}")
print("-" * 100)
for j in sorted(joined, key=lambda x: (x['binary'], -x['score'], x['bucket'], x['name'])):
name_short = (j['name'][:45] + '..') if len(j['name']) > 48 else j['name']
print(f"{j['bucket']:<10} {name_short:<48} {j['binary']:<8} {j['score']:<6} {j['delta_preds']:+d} {j['delta_edges']:+d}")
# PRIMARY TEST: binary purity vs cascade outcome distribution
print()
print("=" * 100)
print("PRIMARY TEST: Binary purity vs cascade outcome distribution")
print("=" * 100)
def categorize_outcome(delta):
if delta > 0:
return 'positive'
elif delta < 0:
return 'negative'
else:
return 'flat'
by_binary = defaultdict(lambda: {'positive': 0, 'flat': 0, 'negative': 0, 'total': 0})
for j in joined:
outcome = categorize_outcome(j['delta_preds'])
by_binary[j['binary']][outcome] += 1
by_binary[j['binary']]['total'] += 1
print()
print(f"{'Group':<15} {'n':<5} {'Positive':<12} {'Flat':<10} {'Negative':<12}")
print("-" * 60)
for binary in ['single', 'multi']:
d = by_binary[binary]
n = d['total']
if n == 0:
continue
pos_pct = d['positive'] / n * 100
flat_pct = d['flat'] / n * 100
neg_pct = d['negative'] / n * 100
print(f"{binary+'-domain':<15} {n:<5} {d['positive']} ({pos_pct:.0f}%) {d['flat']} ({flat_pct:.0f}%) {d['negative']} ({neg_pct:.0f}%)")
# Compute the gap
if by_binary['single']['total'] > 0 and by_binary['multi']['total'] > 0:
single_pos_rate = by_binary['single']['positive'] / by_binary['single']['total'] * 100
multi_pos_rate = by_binary['multi']['positive'] / by_binary['multi']['total'] * 100
gap = single_pos_rate - multi_pos_rate
print()
print(f"Cascade-positive rate gap (single - multi): {gap:+.1f} percentage points")
print()
# Apply pre-registered decision rule
if gap >= 20:
verdict = "NARROWNESS HYPOTHESIS SUPPORTED"
detail = f"Single-domain content is {gap:.0f}pp more likely to gain from cascade than multi-domain."
elif gap <= -20:
verdict = "REVERSE OF HYPOTHESIS"
detail = f"Multi-domain content unexpectedly benefits more (counter to prediction)."
elif abs(gap) < 10:
verdict = "HYPOTHESIS NOT SUPPORTED"
detail = "Domain purity does not appear to predict cascade outcome."
else:
verdict = "INCONCLUSIVE"
detail = f"Gap of {gap:+.0f}pp is suggestive but below the pre-registered 20pp threshold."
print(f" Pre-registered decision rule: {verdict}")
print(f" {detail}")
# SECONDARY TEST: Spearman correlation between purity score and predicate delta
print()
print("=" * 100)
print("SECONDARY TEST: Spearman rank correlation (purity score vs predicate delta)")
print("=" * 100)
scores = [j['score'] for j in joined]
deltas_pred = [j['delta_preds'] for j in joined]
deltas_edge = [j['delta_edges'] for j in joined]
rho_pred = spearman(scores, deltas_pred)
rho_edge = spearman(scores, deltas_edge)
print()
print(f" Spearman ρ (purity score vs Δpredicates): {rho_pred:.3f}")
print(f" Spearman ρ (purity score vs Δedges): {rho_edge:.3f}")
print()
if rho_pred is not None:
if rho_pred >= 0.4:
v = "STRONG POSITIVE — narrowness hypothesis supported with monotonic relationship"
elif rho_pred >= 0.2:
v = "WEAK POSITIVE — consistent with hypothesis but not strong evidence"
elif rho_pred <= -0.2:
v = "NEGATIVE — refutes hypothesis"
else:
v = "NO CORRELATION — hypothesis not supported"
print(f" Predicate delta verdict: {v}")
print()
# TERTIARY TEST: within-bucket correlation
print()
print("=" * 100)
print("TERTIARY TEST: Within-bucket correlation")
print("=" * 100)
by_bucket = defaultdict(list)
for j in joined:
by_bucket[j['bucket']].append(j)
print()
print(f"{'Bucket':<12} {'n':<5} {'Single':<10} {'Multi':<10} {'ρ (score vs Δpred)':<22}")
print("-" * 75)
for bucket in ['high', 'mid', 'low', 'document']:
items = by_bucket.get(bucket, [])
if not items:
continue
n = len(items)
n_single = sum(1 for j in items if j['binary'] == 'single')
n_multi = sum(1 for j in items if j['binary'] == 'multi')
if n >= 3:
scores_b = [j['score'] for j in items]
deltas_b = [j['delta_preds'] for j in items]
rho_b = spearman(scores_b, deltas_b)
rho_str = f"{rho_b:+.3f}" if rho_b is not None else "n/a (no variance)"
else:
rho_str = "n/a (too few)"
print(f"{bucket:<12} {n:<5} {n_single:<10} {n_multi:<10} {rho_str}")
# Interaction with bucket: do single/multi outcomes differ within bucket?
print()
print("Per-bucket cascade-positive rate by binary purity:")
print()
print(f"{'Bucket':<12} {'Single':<25} {'Multi':<25}")
print("-" * 65)
for bucket in ['high', 'mid', 'low', 'document']:
items = by_bucket.get(bucket, [])
if not items:
continue
single_items = [j for j in items if j['binary'] == 'single']
multi_items = [j for j in items if j['binary'] == 'multi']
def rate_str(group):
if not group:
return ""
pos = sum(1 for j in group if j['delta_preds'] > 0)
return f"{pos}/{len(group)} positive ({pos/len(group)*100:.0f}%)"
print(f"{bucket:<12} {rate_str(single_items):<25} {rate_str(multi_items):<25}")
# MEAN DELTA by binary group
print()
print("=" * 100)
print("MEAN PREDICATE DELTA BY GROUP")
print("=" * 100)
print()
for binary in ['single', 'multi']:
items = [j for j in joined if j['binary'] == binary]
if not items:
continue
n = len(items)
mean_dp = sum(j['delta_preds'] for j in items) / n
mean_de = sum(j['delta_edges'] for j in items) / n
sum_pp = sum(j['prod_preds'] for j in items)
sum_cp = sum(j['cascade_preds'] for j in items)
pct_change = (sum_cp - sum_pp) / sum_pp * 100 if sum_pp else 0
print(f"{binary}-domain (n={n}):")
print(f" Mean Δpredicates per source: {mean_dp:+.2f}")
print(f" Mean Δedges per source: {mean_de:+.2f}")
print(f" Aggregate predicate change: {sum_pp}{sum_cp} ({pct_change:+.1f}%)")
print()
# Save joined data for the experiments log writeup
out_path = "/home/aaron/aaronai/experiments/e16_joined_analysis.json"
with open(out_path, "w") as f:
json.dump(joined, f, indent=2)
print(f"Joined data saved to {out_path}")
if __name__ == "__main__":
main()
+206
View File
@@ -0,0 +1,206 @@
"""
E1.6 domain-purity rating interface — with full metadata context.
"""
import json
import os
import random
E14_RESULTS = "/home/aaron/aaronai/experiments/e14_cascade_results.json"
RATINGS_OUT = "/home/aaron/aaronai/experiments/e16_purity_ratings.json"
INTRO = """
================================================================================
E1.6 — DOMAIN-PURITY RATING
================================================================================
Two ratings per source:
1. BINARY — single-domain (s) or multi-domain (m)?
Mental test: "If Mistral had to pick ONE domain class for this source,
would picking just one significantly UNDER-DESCRIBE the content?"
YES → MULTI-DOMAIN (m) — content lives across two+ frames meaningfully
NO → SINGLE-DOMAIN (s) — content fits cleanly within one frame
2. SCORE (1-5) — how cleanly does it fit?
5 = unambiguously one domain
4 = primarily one domain, slight other element
3 = balanced two-domain
2 = primarily two-domain with traces of a third
1 = three or more domain frames weighted significantly
Single binary usually = score 4-5
Multi binary usually = score 1-3
You see for each source: name, length, AND the full Mistral metadata block
(domain_class, primary_format, structural_signals, content_signals, summary).
Blind to: bucket assignment, cascade outcome.
Commands at any prompt: 's', 'm', 'skip', 'quit'
================================================================================
""".strip()
def load_existing():
if os.path.exists(RATINGS_OUT):
with open(RATINGS_OUT) as f:
return json.load(f)
return {"ratings": [], "completed_names": []}
def save(data):
with open(RATINGS_OUT, "w") as f:
json.dump(data, f, indent=2)
def render_metadata(metadata):
"""Pretty-print the full Mistral metadata block."""
if not isinstance(metadata, dict):
print(" (metadata unavailable)")
return
if 'error' in metadata:
print(f" (metadata error: {metadata['error']})")
return
# Render fields in a stable order
field_order = [
'domain_class',
'primary_format',
'structural_signals',
'content_signals',
'summary',
]
for field in field_order:
if field in metadata:
value = metadata[field]
label = field.replace('_', ' ').title()
if isinstance(value, list):
if value:
print(f" {label}:")
for item in value:
print(f" - {item}")
else:
print(f" {label}: (none)")
elif isinstance(value, str):
# Wrap long strings
if len(value) > 70:
print(f" {label}:")
print(f" {value}")
else:
print(f" {label}: {value}")
else:
print(f" {label}: {value}")
# Show any other fields not in the standard order
other_fields = [k for k in metadata.keys() if k not in field_order and k != 'char_length']
for field in other_fields:
value = metadata[field]
label = field.replace('_', ' ').title()
print(f" {label}: {value}")
def render_source(src, idx, total):
print()
print("=" * 80)
print(f" Source {idx}/{total}")
print("=" * 80)
print(f"Name: {src['name']}")
print(f"Length: {src['doc_chars']:,} chars")
print()
print("Mistral metadata:")
print()
render_metadata(src.get('metadata', {}))
print()
print("-" * 80)
def get_rating():
while True:
binary = input("Single-domain or multi-domain? [s/m/skip/quit]: ").strip().lower()
if binary in ('s', 'm', 'skip', 'quit'):
break
print(" Please enter 's', 'm', 'skip', or 'quit'")
if binary == 'quit':
return 'quit'
if binary == 'skip':
return None
while True:
try:
score_input = input("Purity score (1=many frames, 5=clearly single): ").strip()
if score_input.lower() == 'quit':
return 'quit'
score = int(score_input)
if 1 <= score <= 5:
break
print(" Score must be 1-5")
except ValueError:
print(" Please enter a number 1-5 (or 'quit')")
note = input("Optional note (Enter to skip): ").strip()
return {
"binary": "single" if binary == 's' else "multi",
"score": score,
"note": note if note else None,
}
def main():
with open(E14_RESULTS) as f:
e14 = json.load(f)
sources = [r for r in e14['results'] if 'submit_result' in r]
rng = random.Random(42)
shuffled = list(sources)
rng.shuffle(shuffled)
state = load_existing()
completed = set(state['completed_names'])
remaining = [s for s in shuffled if s['name'] not in completed]
print(INTRO)
print()
print(f"Total sources: {len(sources)}")
print(f"Already rated: {len(completed)}")
print(f"Remaining: {len(remaining)}")
print()
if not remaining:
print("All sources rated. Run analysis script next.")
return
input("Press Enter to begin...")
try:
for i, src in enumerate(remaining, start=len(completed) + 1):
render_source(src, i, len(sources))
try:
rating = get_rating()
except (KeyboardInterrupt, EOFError):
print("\n\nSaving and exiting...")
save(state)
return
if rating == 'quit':
print("\nSaving and exiting...")
save(state)
return
if rating is None:
print(" Skipped")
continue
rating['name'] = src['name']
state['ratings'].append(rating)
state['completed_names'].append(src['name'])
save(state)
print(f" Recorded: {rating['binary']}-domain, score={rating['score']}")
print()
print("=" * 80)
print(f"Done. Rated {len(state['ratings'])} sources.")
print(f"Saved to {RATINGS_OUT}")
except (KeyboardInterrupt, EOFError):
print("\n\nSaving...")
save(state)
if __name__ == "__main__":
main()
+134
View File
@@ -0,0 +1,134 @@
#!/usr/bin/env python3
"""E1 metrics comparison — A (Tier 1 aaron) vs B (cascade aaron_cascade_test) on the 10 sample sources."""
import json
import subprocess
from pathlib import Path
EXPERIMENTS = Path.home() / "aaronai" / "experiments"
SAMPLE_FILE = EXPERIMENTS / "cascade_reextract_sample.json"
COMPARISON_FILE = EXPERIMENTS / "cascade_reextract_comparison.json"
def query(group_id, cypher):
result = subprocess.run(
["docker", "exec", "falkordb", "redis-cli", "GRAPH.QUERY", group_id, cypher],
capture_output=True, text=True
)
return result.stdout
def parse_int_result(output):
"""Parse a single-integer result from redis-cli GRAPH.QUERY output."""
lines = [l.strip() for l in output.split("\n") if l.strip()]
for line in lines:
if line.isdigit():
return int(line)
return 0
def parse_string_list(output):
"""Parse a list of strings from redis-cli output (skipping headers and timing)."""
lines = [l.strip() for l in output.split("\n") if l.strip()]
items = []
started = False
for line in lines:
if line.startswith("Cached") or line.startswith("Query internal"):
break
if started:
items.append(line)
# The header is the column name; everything after is data
# But we don't know column names a priori, so detect transition by length pattern
if not started and len(line) < 60 and not any(c in line for c in "{}[]"):
# Likely a header row, skip first one
started = True
return items
def metrics_for_source(group_id, source_name):
"""Get metrics for one source's episode in one group_id."""
# Total entities connected to this episode
q = f'MATCH (e:Episodic {{name: "{source_name}"}})-[]-(n:Entity) RETURN count(distinct n) AS entities'
entities = parse_int_result(query(group_id, q))
# Total edges from this episode (all relationship types)
q = f'MATCH (e:Episodic {{name: "{source_name}"}})-[r]-() RETURN count(r) AS edges'
edges = parse_int_result(query(group_id, q))
# Distinct relationship types in edges from entities of this episode
q = (f'MATCH (e:Episodic {{name: "{source_name}"}})-[]-(n:Entity)-[r]-() '
f'RETURN count(distinct type(r)) AS types')
rel_types = parse_int_result(query(group_id, q))
return {"entities": entities, "edges": edges, "rel_types": rel_types}
def main():
with open(SAMPLE_FILE) as f:
sample = json.load(f)
selected = sample["selected"]
print(f"E1 metrics comparison — {len(selected)} sources, A=aaron vs B=aaron_cascade_test\n")
print(f"{'Source':<60} {'A.ent':>6} {'B.ent':>6} {'A.edg':>6} {'B.edg':>6} {'A.typ':>6} {'B.typ':>6}")
print("-" * 110)
results = []
for ep in selected:
name = ep["name"]
bucket = ep["bucket"]
a = metrics_for_source("aaron", name)
b = metrics_for_source("aaron_cascade_test", name)
record = {
"name": name, "bucket": bucket,
"a_entities": a["entities"], "b_entities": b["entities"],
"a_edges": a["edges"], "b_edges": b["edges"],
"a_rel_types": a["rel_types"], "b_rel_types": b["rel_types"],
}
results.append(record)
# Truncate name for display
display_name = name if len(name) <= 58 else name[:55] + "..."
print(f"{display_name:<60} {a['entities']:>6} {b['entities']:>6} {a['edges']:>6} {b['edges']:>6} {a['rel_types']:>6} {b['rel_types']:>6}")
# Aggregates
print("\n" + "=" * 110)
n = len(results)
a_ent_sum = sum(r["a_entities"] for r in results)
b_ent_sum = sum(r["b_entities"] for r in results)
a_edge_sum = sum(r["a_edges"] for r in results)
b_edge_sum = sum(r["b_edges"] for r in results)
a_types_sum = sum(r["a_rel_types"] for r in results)
b_types_sum = sum(r["b_rel_types"] for r in results)
print(f"\nAggregate (n={n}):")
print(f" Entities: A mean={a_ent_sum/n:.1f} B mean={b_ent_sum/n:.1f} delta={(b_ent_sum-a_ent_sum)/a_ent_sum*100:+.1f}%")
print(f" Edges: A mean={a_edge_sum/n:.1f} B mean={b_edge_sum/n:.1f} delta={(b_edge_sum-a_edge_sum)/a_edge_sum*100:+.1f}%")
print(f" Rel types: A mean={a_types_sum/n:.1f} B mean={b_types_sum/n:.1f} delta={(b_types_sum-a_types_sum)/a_types_sum*100:+.1f}%")
# Global predicate diversity check (unique types in each group_id)
print(f"\nGlobal predicate diversity:")
a_global = parse_int_result(query("aaron", "MATCH ()-[r]-() RETURN count(distinct type(r)) AS t"))
b_global = parse_int_result(query("aaron_cascade_test", "MATCH ()-[r]-() RETURN count(distinct type(r)) AS t"))
print(f" A (aaron): {a_global} distinct relationship types across whole graph")
print(f" B (aaron_cascade_test): {b_global} distinct relationship types across whole graph")
# Per-bucket
print(f"\nPer-bucket aggregates:")
for bucket in ["high", "mid", "low", "document"]:
bucket_results = [r for r in results if r["bucket"] == bucket]
if not bucket_results:
continue
bn = len(bucket_results)
a_e = sum(r["a_entities"] for r in bucket_results) / bn
b_e = sum(r["b_entities"] for r in bucket_results) / bn
a_ed = sum(r["a_edges"] for r in bucket_results) / bn
b_ed = sum(r["b_edges"] for r in bucket_results) / bn
print(f" [{bucket:>8}] n={bn} A.ent={a_e:.1f} B.ent={b_e:.1f} ({(b_e-a_e)/a_e*100:+.0f}%) "
f"A.edg={a_ed:.1f} B.edg={b_ed:.1f} ({(b_ed-a_ed)/a_ed*100:+.0f}%)")
with open(COMPARISON_FILE, "w") as f:
json.dump({
"results": results,
"aggregate": {
"a_entities_total": a_ent_sum, "b_entities_total": b_ent_sum,
"a_edges_total": a_edge_sum, "b_edges_total": b_edge_sum,
"global_predicate_diversity": {"a": a_global, "b": b_global},
},
}, f, indent=2)
print(f"\nSaved to {COMPARISON_FILE}")
if __name__ == "__main__":
main()
+115
View File
@@ -0,0 +1,115 @@
#!/usr/bin/env python3
"""E1 corrected metric — count distinct predicate names on edges originating from each episode."""
import json
import subprocess
from pathlib import Path
EXPERIMENTS = Path.home() / "aaronai" / "experiments"
SAMPLE_FILE = EXPERIMENTS / "cascade_reextract_sample.json"
def query(group_id, cypher):
result = subprocess.run(
["docker", "exec", "falkordb", "redis-cli", "GRAPH.QUERY", group_id, cypher],
capture_output=True, text=True
)
return result.stdout
def get_episode_uuid(group_id, episode_name):
"""Look up the UUID for a given episode name in a given group."""
# Escape single quotes in the name
safe = episode_name.replace("'", "\\'")
cypher = f"MATCH (e:Episodic) WHERE e.name = '{safe}' RETURN e.uuid LIMIT 1"
output = query(group_id, cypher)
lines = [l.strip() for l in output.split("\n") if l.strip()]
for line in lines:
# UUID format check
if len(line) == 36 and line.count("-") == 4:
return line
return None
def count_predicates_for_episode(group_id, uuid):
"""Count distinct predicate names on edges where this episode UUID appears in r.episodes."""
cypher = f"MATCH ()-[r:RELATES_TO]->() WHERE '{uuid}' IN r.episodes RETURN count(distinct r.name) AS p"
output = query(group_id, cypher)
lines = [l.strip() for l in output.split("\n") if l.strip()]
for line in lines:
if line.isdigit():
return int(line)
return 0
def count_total_edges_for_episode(group_id, uuid):
"""Count total edges originating from this episode."""
cypher = f"MATCH ()-[r:RELATES_TO]->() WHERE '{uuid}' IN r.episodes RETURN count(r) AS n"
output = query(group_id, cypher)
lines = [l.strip() for l in output.split("\n") if l.strip()]
for line in lines:
if line.isdigit():
return int(line)
return 0
with open(SAMPLE_FILE) as f:
sample = json.load(f)
selected = sample["selected"]
print(f"E1 corrected per-source comparison — predicates per episode by edge origin\n")
print(f"{'Source':<60} {'A.edges':>8} {'A.preds':>8} {'B.edges':>8} {'B.preds':>8}")
print("-" * 100)
a_pred_total = 0
b_pred_total = 0
a_edge_total = 0
b_edge_total = 0
records = []
for ep in selected:
name = ep["name"]
a_uuid = get_episode_uuid("aaron", name)
b_uuid = get_episode_uuid("aaron_cascade_test", name)
a_edges = count_total_edges_for_episode("aaron", a_uuid) if a_uuid else 0
a_preds = count_predicates_for_episode("aaron", a_uuid) if a_uuid else 0
b_edges = count_total_edges_for_episode("aaron_cascade_test", b_uuid) if b_uuid else 0
b_preds = count_predicates_for_episode("aaron_cascade_test", b_uuid) if b_uuid else 0
display = name if len(name) <= 58 else name[:55] + "..."
print(f"{display:<60} {a_edges:>8} {a_preds:>8} {b_edges:>8} {b_preds:>8}")
records.append({
"name": name, "bucket": ep["bucket"],
"a_edges": a_edges, "a_preds": a_preds,
"b_edges": b_edges, "b_preds": b_preds,
})
a_pred_total += a_preds
b_pred_total += b_preds
a_edge_total += a_edges
b_edge_total += b_edges
print("-" * 100)
n = len(selected)
print(f"\nAggregate (n={n}):")
print(f" Edges: A total={a_edge_total} mean={a_edge_total/n:.1f} B total={b_edge_total} mean={b_edge_total/n:.1f}")
print(f" Predicates: A total={a_pred_total} mean={a_pred_total/n:.1f} B total={b_pred_total} mean={b_pred_total/n:.1f}")
if a_pred_total > 0:
print(f" Predicate delta: B vs A = {(b_pred_total-a_pred_total)/a_pred_total*100:+.1f}%")
if a_edge_total > 0:
print(f" Edge delta: B vs A = {(b_edge_total-a_edge_total)/a_edge_total*100:+.1f}%")
# Per-bucket
print(f"\nPer-bucket:")
for bucket in ["high", "mid", "low", "document"]:
bucket_records = [r for r in records if r["bucket"] == bucket]
if not bucket_records:
continue
bn = len(bucket_records)
a_p = sum(r["a_preds"] for r in bucket_records)
b_p = sum(r["b_preds"] for r in bucket_records)
a_e = sum(r["a_edges"] for r in bucket_records)
b_e = sum(r["b_edges"] for r in bucket_records)
delta = ((b_p-a_p)/a_p*100) if a_p > 0 else 0
print(f" [{bucket:>8}] n={bn} A.preds={a_p:>3} B.preds={b_p:>3} ({delta:+.0f}%) A.edges={a_e:>3} B.edges={b_e:>3}")
with open(EXPERIMENTS / "cascade_reextract_corrected_comparison.json", "w") as f:
json.dump({"per_source": records,
"aggregate": {"a_preds": a_pred_total, "b_preds": b_pred_total,
"a_edges": a_edge_total, "b_edges": b_edge_total}}, f, indent=2)
print(f"\nSaved to {EXPERIMENTS / 'cascade_reextract_corrected_comparison.json'}")
+190
View File
@@ -0,0 +1,190 @@
#!/usr/bin/env python3
"""E1 orchestration — fetch source text, run Mistral metadata, submit to Graphiti test group_id."""
import json
import os
import requests
import subprocess
import time
from pathlib import Path
import psycopg2
from dotenv import load_dotenv
load_dotenv(Path.home() / "aaronai" / ".env")
EXPERIMENTS = Path.home() / "aaronai" / "experiments"
SAMPLE_FILE = EXPERIMENTS / "cascade_reextract_sample.json"
RESULTS_FILE = EXPERIMENTS / "cascade_reextract_results.json"
PG_DSN = os.environ["PG_DSN"]
SIDECAR_URL = "http://localhost:8001"
TEST_GROUP_ID = "aaron_cascade_test"
MAX_DOC_CHARS = 12000 # Same cap as Tier 1 for parity
# Stage 2 metadata prompt — verbatim from stage-2-worker-spec.md
METADATA_PROMPT = """You are a metadata extraction system. Given a document, produce structural and content metadata in strict JSON format.
Do not summarize the content beyond the one-sentence summary field. Do not extract entities or relationships. Do not interpret meaning. Produce only the metadata schema below.
Output JSON only. No prose, no explanation, no markdown code fences.
Schema:
{
"language": "<ISO 639-1 code>",
"char_length": <integer>,
"primary_format": "<prose|slides|code|structured|mixed>",
"structural_signals": {
"has_headings": <boolean>,
"has_bullet_lists": <boolean>,
"has_numbered_lists": <boolean>,
"has_tables": <boolean>,
"has_code_blocks": <boolean>,
"has_dates": <boolean>
},
"content_signals": {
"has_named_people": <boolean>,
"has_institutional_language": <boolean>,
"has_technical_terminology": <boolean>,
"has_first_person": <boolean>,
"has_quotations": <boolean>
},
"domain_class": "<technical|administrative|educational|personal|conversational>",
"one_sentence_summary": "<one sentence describing what the document is about>"
}
Document:
"""
def get_pg():
return psycopg2.connect(PG_DSN)
def fetch_source_text(source):
"""Reassemble the full document from pgvector chunks, mirroring tier1_migration.py logic."""
conn = get_pg()
cur = conn.cursor()
cur.execute("""
SELECT STRING_AGG(document, E'\n\n' ORDER BY id) AS full_doc
FROM embeddings WHERE source = %s
""", (source,))
row = cur.fetchone()
conn.close()
if row is None or row[0] is None:
return None
return row[0]
def run_mistral_metadata(text):
"""Call local Mistral via Ollama for base-class metadata."""
truncated = text[:MAX_DOC_CHARS]
prompt = METADATA_PROMPT + truncated
response = requests.post(
"http://localhost:11434/api/generate",
json={"model": "mistral:latest", "prompt": prompt, "stream": False, "format": "json"},
timeout=180,
)
response.raise_for_status()
raw = response.json()["response"]
try:
metadata = json.loads(raw)
# Override char_length with python-computed value (per stage-2-worker-spec)
metadata["char_length"] = len(truncated)
return metadata
except json.JSONDecodeError:
return {"error": "JSON parse failed", "raw": raw[:500]}
def format_metadata_as_orientation(metadata):
"""Format the base-class metadata as a source_description for Graphiti, with orient-not-bound framing."""
if "error" in metadata:
return f"tier1_cascade_test (metadata generation failed: {metadata['error']})"
summary = metadata.get("one_sentence_summary", "")
domain = metadata.get("domain_class", "unknown")
fmt = metadata.get("primary_format", "unknown")
return (
f"This is a {domain} document in {fmt} format. "
f"Summary: {summary} "
f"This metadata is provided to orient your extraction, not to constrain it. "
f"Extract entities and relationships freely from the document text itself; "
f"the metadata is descriptive context, not a checklist."
)
def submit_episode(name, content, source_description):
"""Submit episode to Graphiti sidecar at the test group_id."""
payload = {
"episodes": [{
"name": name,
"content": content[:MAX_DOC_CHARS],
"source_description": source_description,
"timestamp": "2026-04-28T00:00:00",
}],
"group_id": TEST_GROUP_ID,
}
response = requests.post(f"{SIDECAR_URL}/episodes/bulk", json=payload, timeout=300)
response.raise_for_status()
return response.json()
def main():
with open(SAMPLE_FILE) as f:
sample = json.load(f)
selected = sample["selected"]
print(f"E1 cascade re-extraction starting — {len(selected)} episodes to test group_id={TEST_GROUP_ID}\n")
results = []
for i, ep in enumerate(selected, 1):
name = ep["name"]
bucket = ep["bucket"]
print(f"[{i}/{len(selected)}] [{bucket}] {name}")
record = {"name": name, "bucket": bucket, "tier1_entities": ep["entities"]}
# Fetch text
print(f" Fetching source text...", end=" ", flush=True)
text = fetch_source_text(name)
if text is None:
print("FAILED — no chunks in pgvector")
record["error"] = "no source text"
results.append(record)
continue
record["doc_chars"] = len(text)
print(f"{len(text)} chars")
# Mistral metadata
print(f" Generating Mistral metadata...", end=" ", flush=True)
t0 = time.time()
metadata = run_mistral_metadata(text)
elapsed = time.time() - t0
record["metadata"] = metadata
record["metadata_elapsed_s"] = round(elapsed, 1)
if "error" in metadata:
print(f"FAILED in {elapsed:.1f}s")
else:
print(f"{elapsed:.1f}s — domain={metadata.get('domain_class')}, format={metadata.get('primary_format')}")
# Submit to Graphiti
source_desc = format_metadata_as_orientation(metadata)
record["source_description"] = source_desc
print(f" Submitting to Graphiti test group...", end=" ", flush=True)
t0 = time.time()
try:
result = submit_episode(name, text, source_desc)
elapsed = time.time() - t0
print(f"{elapsed:.1f}s — OK")
record["submit_elapsed_s"] = round(elapsed, 1)
record["submit_result"] = result
except Exception as e:
elapsed = time.time() - t0
print(f"{elapsed:.1f}s — FAILED: {e}")
record["submit_error"] = str(e)
results.append(record)
# Save intermediate state after each episode
with open(RESULTS_FILE, "w") as f:
json.dump({"results": results}, f, indent=2, default=str)
print()
print(f"\nDone. Results saved to {RESULTS_FILE}")
if __name__ == "__main__":
main()
+181
View File
@@ -0,0 +1,181 @@
#!/usr/bin/env python3
"""E1 corrected re-run — cascade orientation passed via custom_extraction_instructions."""
import json
import os
import requests
import time
from pathlib import Path
import psycopg2
from dotenv import load_dotenv
load_dotenv(Path.home() / "aaronai" / ".env")
EXPERIMENTS = Path.home() / "aaronai" / "experiments"
SAMPLE_FILE = EXPERIMENTS / "cascade_reextract_sample.json"
RESULTS_FILE = EXPERIMENTS / "cascade_reextract_results.json"
PG_DSN = os.environ["PG_DSN"]
SIDECAR_URL = "http://localhost:8001"
TEST_GROUP_ID = "aaron_cascade_test"
MAX_DOC_CHARS = 12000
METADATA_PROMPT = """You are a metadata extraction system. Given a document, produce structural and content metadata in strict JSON format.
Do not summarize the content beyond the one-sentence summary field. Do not extract entities or relationships. Do not interpret meaning. Produce only the metadata schema below.
Output JSON only. No prose, no explanation, no markdown code fences.
Schema:
{
"language": "<ISO 639-1 code>",
"char_length": <integer>,
"primary_format": "<prose|slides|code|structured|mixed>",
"structural_signals": {
"has_headings": <boolean>,
"has_bullet_lists": <boolean>,
"has_numbered_lists": <boolean>,
"has_tables": <boolean>,
"has_code_blocks": <boolean>,
"has_dates": <boolean>
},
"content_signals": {
"has_named_people": <boolean>,
"has_institutional_language": <boolean>,
"has_technical_terminology": <boolean>,
"has_first_person": <boolean>,
"has_quotations": <boolean>
},
"domain_class": "<technical|administrative|educational|personal|conversational>",
"one_sentence_summary": "<one sentence describing what the document is about>"
}
Document:
"""
def get_pg():
return psycopg2.connect(PG_DSN)
def fetch_source_text(source):
conn = get_pg()
cur = conn.cursor()
cur.execute("""
SELECT STRING_AGG(document, E'\n\n' ORDER BY id) AS full_doc
FROM embeddings WHERE source = %s
""", (source,))
row = cur.fetchone()
conn.close()
if row is None or row[0] is None:
return None
return row[0]
def run_mistral_metadata(text):
truncated = text[:MAX_DOC_CHARS]
prompt = METADATA_PROMPT + truncated
response = requests.post(
"http://localhost:11434/api/generate",
json={"model": "mistral:latest", "prompt": prompt, "stream": False, "format": "json"},
timeout=180,
)
response.raise_for_status()
raw = response.json()["response"]
try:
metadata = json.loads(raw)
metadata["char_length"] = len(truncated)
return metadata
except json.JSONDecodeError:
return {"error": "JSON parse failed", "raw": raw[:500]}
def format_metadata_as_orientation(metadata):
"""Format metadata as orient-not-bound extraction instructions."""
if "error" in metadata:
return None
summary = metadata.get("one_sentence_summary", "")
domain = metadata.get("domain_class", "unknown")
fmt = metadata.get("primary_format", "unknown")
return (
f"This is a {domain} document in {fmt} format. "
f"Summary: {summary} "
f"This metadata is provided to orient your extraction, not to constrain it. "
f"Extract entities and relationships freely from the document text itself; "
f"the metadata is descriptive context, not a checklist."
)
def submit_episode_singular(name, content, custom_instructions):
"""Submit episode to Graphiti's singular /episodes endpoint with cascade orientation."""
payload = {
"name": name,
"content": content[:MAX_DOC_CHARS],
"source_description": "e1_corrected_run", # neutral label, not the cascade text
"timestamp": "2026-04-28T00:00:00",
"group_id": TEST_GROUP_ID,
"custom_extraction_instructions": custom_instructions,
}
response = requests.post(f"{SIDECAR_URL}/episodes", json=payload, timeout=300)
response.raise_for_status()
return response.json()
def main():
with open(SAMPLE_FILE) as f:
sample = json.load(f)
selected = sample["selected"]
print(f"E1 CORRECTED re-run — {len(selected)} episodes via /episodes (singular)")
print(f"Cascade orientation passed in custom_extraction_instructions.\n")
results = []
for i, ep in enumerate(selected, 1):
name = ep["name"]
bucket = ep["bucket"]
print(f"[{i}/{len(selected)}] [{bucket}] {name}")
record = {"name": name, "bucket": bucket, "tier1_entities": ep["entities"]}
print(f" Fetching source text...", end=" ", flush=True)
text = fetch_source_text(name)
if text is None:
print("FAILED — no chunks in pgvector")
record["error"] = "no source text"
results.append(record)
continue
record["doc_chars"] = len(text)
print(f"{len(text)} chars")
print(f" Generating Mistral metadata...", end=" ", flush=True)
t0 = time.time()
metadata = run_mistral_metadata(text)
elapsed = time.time() - t0
record["metadata"] = metadata
record["metadata_elapsed_s"] = round(elapsed, 1)
if "error" in metadata:
print(f"FAILED in {elapsed:.1f}s")
else:
print(f"{elapsed:.1f}s — domain={metadata.get('domain_class')}, format={metadata.get('primary_format')}")
custom_instructions = format_metadata_as_orientation(metadata)
record["custom_extraction_instructions"] = custom_instructions
print(f" Submitting via /episodes (singular) with custom_extraction_instructions...", end=" ", flush=True)
t0 = time.time()
try:
result = submit_episode_singular(name, text, custom_instructions)
elapsed = time.time() - t0
print(f"{elapsed:.1f}s — OK")
record["submit_elapsed_s"] = round(elapsed, 1)
record["submit_result"] = result
except Exception as e:
elapsed = time.time() - t0
print(f"{elapsed:.1f}s — FAILED: {e}")
record["submit_error"] = str(e)
results.append(record)
with open(RESULTS_FILE, "w") as f:
json.dump({"results": results}, f, indent=2, default=str)
print()
print(f"\nDone. Results saved to {RESULTS_FILE}")
if __name__ == "__main__":
main()
+116
View File
@@ -0,0 +1,116 @@
#!/usr/bin/env python3
"""E1 sample selection — pick 10 episodes from Tier 1 stratified by density and type."""
import json
import os
import subprocess
from pathlib import Path
from collections import defaultdict
EXPERIMENTS = Path.home() / "aaronai" / "experiments"
OUTPUT = EXPERIMENTS / "cascade_reextract_sample.json"
# Get all Tier 1 episodes with their entity counts via FalkorDB
def query_episode_counts():
query = ("MATCH (e:Episodic) OPTIONAL MATCH (e)-[r]-(n:Entity) "
"RETURN e.name AS name, count(distinct n) AS entities "
"ORDER BY entities DESC")
result = subprocess.run(
["docker", "exec", "falkordb", "redis-cli", "GRAPH.QUERY", "aaron", query],
capture_output=True, text=True
)
# Parse the output — redis-cli returns rows after a header
lines = [l for l in result.stdout.split("\n") if l.strip()]
episodes = []
# Skip header rows ("name", "entities") and timing rows
i = 0
while i < len(lines):
if lines[i] == "name":
i += 2 # skip "name" and "entities" headers
continue
if lines[i].startswith("Cached") or lines[i].startswith("Query"):
break
# Each episode: name on one line, count on next
if i + 1 < len(lines):
try:
count = int(lines[i + 1])
episodes.append({"name": lines[i], "entities": count})
i += 2
except ValueError:
i += 1
else:
i += 1
return episodes
print("Fetching episode entity counts from FalkorDB...")
episodes = query_episode_counts()
print(f"Got {len(episodes)} episodes")
# Classify by density bucket and type
def is_document(name):
doc_extensions = (".pdf", ".docx", ".pptx", ".txt", ".md")
return any(name.lower().endswith(ext) for ext in doc_extensions)
# Compute quartile boundaries from the entity counts
counts = sorted([e["entities"] for e in episodes], reverse=True)
n = len(counts)
top_q = counts[n // 4] # 25th percentile from top
bottom_q = counts[3 * n // 4] # 75th percentile from top
print(f"\nQuartile boundaries: top={top_q}+, middle=({bottom_q+1}-{top_q-1}), bottom=0-{bottom_q}")
high = [e for e in episodes if e["entities"] >= top_q and not is_document(e["name"])]
mid = [e for e in episodes if bottom_q < e["entities"] < top_q and not is_document(e["name"])]
low = [e for e in episodes if e["entities"] <= bottom_q and not is_document(e["name"])]
docs = [e for e in episodes if is_document(e["name"]) and e["entities"] >= 5]
print(f"High-density conversations: {len(high)}")
print(f"Mid-density conversations: {len(mid)}")
print(f"Low-density conversations: {len(low)}")
print(f"Documents (≥5 entities): {len(docs)}")
# Deterministic selection — take from middle of each bucket to avoid edge cases
def pick(bucket, n):
if len(bucket) < n:
return bucket
mid_idx = len(bucket) // 2
start = max(0, mid_idx - n // 2)
return bucket[start:start + n]
selected = (
pick(high, 3) +
pick(mid, 3) +
pick(low, 2) +
pick(docs, 2)
)
# Tag each with its bucket
def bucket_for(ep):
if is_document(ep["name"]):
return "document"
if ep["entities"] >= top_q:
return "high"
if ep["entities"] > bottom_q:
return "mid"
return "low"
for ep in selected:
ep["bucket"] = bucket_for(ep)
print(f"\nSelected {len(selected)} episodes for E1:")
for ep in selected:
print(f" [{ep['bucket']:>8}] {ep['entities']:>3}e {ep['name']}")
# Save selection
with open(OUTPUT, "w") as f:
json.dump({
"metadata": {
"purpose": "E1 cascade re-extraction sample (n=10)",
"stratification": "density buckets + document subset",
"quartile_top": top_q,
"quartile_bottom": bottom_q,
"total_tier1_episodes": len(episodes),
},
"selected": selected,
}, f, indent=2)
print(f"\nSaved to {OUTPUT}")
+24
View File
@@ -0,0 +1,24 @@
#!/usr/bin/env python3
"""E2 follow-up: confirm Aaron AI alias situation, find other potential duplicates."""
import subprocess
QUERIES = [
("Aaron AI variants",
"MATCH (n:Entity) WHERE n.name CONTAINS 'Aaron AI' OR n.name CONTAINS 'ARIN' OR n.name CONTAINS 'RNAI' RETURN n.name, n.summary"),
("All Mossygear-named entities",
"MATCH (n:Entity) WHERE n.name CONTAINS 'Mossy' OR n.name CONTAINS 'A+K' OR n.name CONTAINS 'AK Design' RETURN n.name, n.summary"),
("Total entity count check",
"MATCH (n:Entity) RETURN count(n) as total"),
("Top 30 entity names by edge count",
"MATCH (n:Entity)-[r]-() RETURN n.name, count(r) as edges ORDER BY edges DESC LIMIT 30"),
]
for label, query in QUERIES:
print(f"\n{'=' * 60}")
print(f"QUERY: {label}")
print('=' * 60)
result = subprocess.run(
["docker", "exec", "falkordb", "redis-cli", "GRAPH.QUERY", "aaron", query],
capture_output=True, text=True
)
print(result.stdout)
+20
View File
@@ -0,0 +1,20 @@
#!/usr/bin/env python3
"""E2: Entity resolution diagnostic. Queries Graphiti's FalkorDB for the six test entities."""
import subprocess
import sys
TEST_ENTITIES = ["Aaron", "Kat", "HVAMC", "Bird", "Susan Hamlet", "Tulsa album"]
def run_cypher(query):
result = subprocess.run(
["docker", "exec", "falkordb", "redis-cli", "GRAPH.QUERY", "aaron", query],
capture_output=True, text=True
)
return result.stdout
for name in TEST_ENTITIES:
print(f"\n{'=' * 60}")
print(f"ENTITY: {name}")
print('=' * 60)
query = f"MATCH (n:Entity) WHERE n.name CONTAINS '{name}' RETURN n.name, n.summary"
print(run_cypher(query))
+24
View File
@@ -0,0 +1,24 @@
#!/usr/bin/env python3
"""E2 follow-up: how many distinct episodes connect to each entity?"""
import subprocess
QUERIES = [
("Aaron", "MATCH (n:Entity {name: 'Aaron'})-[]-(e:Episodic) RETURN DISTINCT e.name LIMIT 30"),
("Nelson", "MATCH (n:Entity {name: 'Nelson'})-[]-(e:Episodic) RETURN DISTINCT e.name LIMIT 30"),
("HVAMC", "MATCH (n:Entity {name: 'HVAMC'})-[]-(e:Episodic) RETURN DISTINCT e.name LIMIT 30"),
("Bird", "MATCH (n:Entity {name: 'Bird'})-[]-(e:Episodic) RETURN DISTINCT e.name LIMIT 30"),
("Tulsa album", "MATCH (n:Entity {name: 'Tulsa album'})-[]-(e:Episodic) RETURN DISTINCT e.name LIMIT 30"),
("Susan Hamlet", "MATCH (n:Entity {name: 'Susan Hamlet'})-[]-(e:Episodic) RETURN DISTINCT e.name LIMIT 30"),
("Kat", "MATCH (n:Entity {name: 'Kat'})-[]-(e:Episodic) RETURN DISTINCT e.name LIMIT 30"),
("Katherine Wilson","MATCH (n:Entity {name: 'Katherine Wilson'})-[]-(e:Episodic) RETURN DISTINCT e.name LIMIT 30"),
]
for label, query in QUERIES:
print(f"\n{'=' * 60}")
print(f"ENTITY: {label}")
print('=' * 60)
result = subprocess.run(
["docker", "exec", "falkordb", "redis-cli", "GRAPH.QUERY", "aaron", query],
capture_output=True, text=True
)
print(result.stdout)
+190
View File
@@ -0,0 +1,190 @@
#!/usr/bin/env python3
"""
E1.8 Phase 2 — Evaluate
Pulls predicate counts from FalkorDB for each group_id and compares.
Run after e1_8_taxfree_cascade.py completes.
"""
import json, subprocess
from pathlib import Path
RESULTS_PATH = Path.home() / "aaronai" / "experiments" / "e1_8_results.json"
EVAL_PATH = Path.home() / "aaronai" / "experiments" / "e1_8_eval.json"
GROUP_TAXFREE = "aaron_e18_taxfree"
GROUP_BASELINE = "aaron_e18_baseline"
GROUP_STANDARD = "aaron_e18_standard"
GROUP_PROD = "aaron"
GROUP_E14 = "aaron_cascade_e14"
def query(group_id, cypher):
result = subprocess.run(
["docker", "exec", "falkordb", "redis-cli", "GRAPH.QUERY", group_id, cypher],
capture_output=True, text=True
)
return result.stdout
def get_episode_uuid(group_id, episode_name):
safe = episode_name.replace("'", "\'")
cypher = f"MATCH (e:Episodic) WHERE e.name = '{safe}' RETURN e.uuid LIMIT 1"
output = query(group_id, cypher)
for line in output.split("\n"):
line = line.strip()
if len(line) == 36 and line.count("-") == 4:
return line
return None
def count_preds(group_id, uuid):
cypher = f"MATCH ()-[r:RELATES_TO]->() WHERE '{uuid}' IN r.episodes RETURN count(distinct r.name) AS p"
output = query(group_id, cypher)
for line in output.split("\n"):
line = line.strip()
if line.isdigit():
return int(line)
return 0
def count_edges(group_id, uuid):
cypher = f"MATCH ()-[r:RELATES_TO]->() WHERE '{uuid}' IN r.episodes RETURN count(r) AS n"
output = query(group_id, cypher)
for line in output.split("\n"):
line = line.strip()
if line.isdigit():
return int(line)
return 0
def eval_source(name, groups):
result = {"name": name}
for label, group_id in groups.items():
uuid = get_episode_uuid(group_id, name)
if uuid:
result[f"{label}_preds"] = count_preds(group_id, uuid)
result[f"{label}_edges"] = count_edges(group_id, uuid)
else:
result[f"{label}_preds"] = None
result[f"{label}_edges"] = None
return result
def run():
print("E1.8 — Evaluation phase")
print("=" * 60)
results = json.loads(RESULTS_PATH.read_text())
eval_results = {"subsample_a": [], "subsample_b": []}
# Sub-sample A — compare taxfree vs prod (baseline) vs e14 cascade
print("\nSub-sample A")
print(f"{'Source':<55} {'prod':>5} {'e14c':>5} {'tf':>5} {'e14Δ':>6} {'tfΔ':>6}")
print("-" * 90)
a_records = []
for item in results["subsample_a"]:
name = item["name"]
r = eval_source(name, {
"prod": GROUP_PROD,
"e14": GROUP_E14,
"tf": GROUP_TAXFREE,
})
r["bucket"] = item["bucket"]
r["taxfree_metadata"] = item.get("taxfree_metadata")
r["e14_delta_preds"] = item.get("e14_delta_preds")
prod = r.get("prod_preds") or 0
e14 = r.get("e14_preds") or 0
tf = r.get("tf_preds") or 0
e14_delta = ((e14 - prod) / prod * 100) if prod > 0 else 0
tf_delta = ((tf - prod) / prod * 100) if prod > 0 else 0
display = name[:53] + ".." if len(name) > 55 else name
print(f"{display:<55} {prod:>5} {e14:>5} {tf:>5} {e14_delta:>+5.0f}% {tf_delta:>+5.0f}%")
r["tf_delta_vs_prod"] = tf_delta
r["e14_delta_vs_prod"] = e14_delta
a_records.append(r)
eval_results["subsample_a"].append(r)
# Aggregate Sub-sample A
valid = [r for r in a_records if r.get("prod_preds") and r.get("tf_preds")]
if valid:
mean_e14_delta = sum(r["e14_delta_vs_prod"] for r in valid) / len(valid)
mean_tf_delta = sum(r["tf_delta_vs_prod"] for r in valid) / len(valid)
print(f"\nAggregate Sub-sample A (n={len(valid)}):")
print(f" E1.4 cascade mean delta vs prod: {mean_e14_delta:+.1f}%")
print(f" Taxonomy-free mean delta vs prod: {mean_tf_delta:+.1f}%")
print(f" Taxonomy-free vs E1.4 cascade: {mean_tf_delta - mean_e14_delta:+.1f}pp")
# Sub-sample B — all three conditions
print("\n\nSub-sample B")
print(f"{'Source':<55} {'base':>5} {'std':>5} {'tf':>5} {'stdΔ':>6} {'tfΔ':>6}")
print("-" * 90)
b_records = []
for item in results["subsample_b"]:
name = item["name"]
r = eval_source(name, {
"base": GROUP_BASELINE,
"std": GROUP_STANDARD,
"tf": GROUP_TAXFREE,
})
r["bucket"] = item["bucket"]
r["taxfree_metadata"] = item.get("taxfree_metadata")
r["standard_metadata"] = item.get("standard_metadata")
base = r.get("base_preds") or 0
std = r.get("std_preds") or 0
tf = r.get("tf_preds") or 0
std_delta = ((std - base) / base * 100) if base > 0 else 0
tf_delta = ((tf - base) / base * 100) if base > 0 else 0
display = name[:53] + ".." if len(name) > 55 else name
print(f"{display:<55} {base:>5} {std:>5} {tf:>5} {std_delta:>+5.0f}% {tf_delta:>+5.0f}%")
r["std_delta_vs_base"] = std_delta
r["tf_delta_vs_base"] = tf_delta
b_records.append(r)
eval_results["subsample_b"].append(r)
# Aggregate Sub-sample B
valid_b = [r for r in b_records if r.get("base_preds") and r.get("tf_preds")]
if valid_b:
mean_std_delta = sum(r["std_delta_vs_base"] for r in valid_b) / len(valid_b)
mean_tf_delta = sum(r["tf_delta_vs_base"] for r in valid_b) / len(valid_b)
print(f"\nAggregate Sub-sample B (n={len(valid_b)}):")
print(f" Standard cascade mean delta vs baseline: {mean_std_delta:+.1f}%")
print(f" Taxonomy-free mean delta vs baseline: {mean_tf_delta:+.1f}%")
# By bucket
print("\nPer-bucket (Sub-sample B):")
for bucket in ["high", "mid", "document"]:
br = [r for r in valid_b if r["bucket"] == bucket]
if not br:
continue
m_std = sum(r["std_delta_vs_base"] for r in br) / len(br)
m_tf = sum(r["tf_delta_vs_base"] for r in br) / len(br)
print(f" [{bucket:>8}] n={len(br)} std={m_std:+.0f}% tf={m_tf:+.0f}%")
# Decision rule evaluation
print("\n" + "=" * 60)
print("DECISION RULE:")
if valid:
improvement = mean_tf_delta - mean_e14_delta
if improvement >= 20:
print(f" ✓ STRONG RECOVERY (+{improvement:.1f}pp) — Stage 3.1 ships as taxonomy-free")
elif improvement >= 5:
print(f" ~ PARTIAL RECOVERY (+{improvement:.1f}pp) — orientation helps, needs refinement")
elif improvement >= 0:
print(f" ~ MARGINAL (+{improvement:.1f}pp) — consider API extractor prompt redesign (E1.9)")
else:
print(f" ✗ NEGATIVE ({improvement:.1f}pp) — taxonomy-free introduces more noise than standard")
EVAL_PATH.write_text(json.dumps(eval_results, indent=2))
print(f"\nEval saved to {EVAL_PATH}")
if __name__ == "__main__":
run()
+285
View File
@@ -0,0 +1,285 @@
#!/usr/bin/env python3
"""
E1.8 Phase 1 — Ingest
Runs taxonomy-free and standard cascade ingestion for Sub-samples A and B.
Run this first, then run e1_8_eval.py to pull predicate counts.
"""
import os, json, time, psycopg2, requests
from pathlib import Path
from dotenv import load_dotenv
load_dotenv(Path.home() / "aaronai" / ".env", override=True)
PG_DSN = os.getenv("PG_DSN")
GRAPHITI_URL = "http://localhost:8001"
RESULTS_PATH = Path.home() / "aaronai" / "experiments" / "e1_8_results.json"
GROUP_TAXFREE = "aaron_e18_taxfree"
GROUP_BASELINE = "aaron_e18_baseline"
GROUP_STANDARD = "aaron_e18_standard"
TAXFREE_PROMPT = """You are a metadata extraction system. Given a document, describe its content shape for use as orientation context in a knowledge graph extraction pass.
Do not summarize content. Do not extract entities. Do not assign a single category label.
Instead, describe:
- What domains or frames are active in this content (there may be several simultaneously)
- How those frames relate to each other in this specific document
- What kind of relational content a knowledge graph extractor should look for
Output JSON only. No prose, no explanation, no markdown.
Schema:
{
"active_frames": ["<frame 1>", "<frame 2>", ...],
"frame_relationships": "<one sentence describing how the frames interact in this document>",
"extraction_orientation": "<one sentence orienting the extractor toward the most relationship-rich content>",
"one_sentence_summary": "<one sentence describing what the document is about>"
}
Document:
"""
STANDARD_PROMPT = """You are a metadata extraction system. Given a document, produce structural and content metadata in strict JSON format.
Do not summarize the content beyond the one-sentence summary field. Do not extract entities or relationships. Do not interpret meaning. Produce only the metadata schema below.
Output JSON only. No prose, no explanation, no markdown code fences.
Schema:
{
"language": "<ISO 639-1 code>",
"char_length": <integer>,
"primary_format": "<prose|slides|code|structured|mixed>",
"structural_signals": {
"has_headings": <boolean>,
"has_bullet_lists": <boolean>,
"has_numbered_lists": <boolean>,
"has_tables": <boolean>,
"has_code_blocks": <boolean>,
"has_dates": <boolean>
},
"content_signals": {
"has_named_people": <boolean>,
"has_institutional_language": <boolean>,
"has_technical_terminology": <boolean>,
"has_first_person": <boolean>,
"has_quotations": <boolean>
},
"domain_class": "<technical|administrative|educational|personal|conversational>",
"one_sentence_summary": "<one sentence describing what the document is about>"
}
Document:
"""
SUBSAMPLE_A = [
{"name": "Claude: Lubbock on everything album lyrics", "bucket": "high"},
{"name": "ChatGPT: Tulsa Concept Album Guide", "bucket": "high"},
{"name": "ChatGPT: Rhino 3D object flow", "bucket": "high"},
{"name": "Claude: SUNY faculty conflict of interest policies", "bucket": "mid"},
{"name": "Claude: Interview presentation research and preparation", "bucket": "mid"},
{"name": "Claude: Research Statement Restructure", "bucket": "mid"},
{"name": "ChatGPT: Respect Individual Interests for Christmas", "bucket": "low"},
{"name": "University of North Texas Cover letter.pdf", "bucket": "document"},
{"name": "Claude: Finding ideal rural housing near University of Utah", "bucket": "high"},
{"name": "ChatGPT: SEC coaches with OSU ties", "bucket": "high"},
{"name": "Claude: Bonding ASA 3D printed parts", "bucket": "mid"},
{"name": "ChatGPT: Title: User request summary.", "bucket": "low"},
{"name": "ChatGPT: Scholarship Recommendation Letter Tips", "bucket": "low"},
]
SUBSAMPLE_B = [
{"name": "ChatGPT: Job application comparison", "bucket": "high"},
{"name": "ChatGPT: External review for tenure", "bucket": "high"},
{"name": "Claude: University of Utah interview teaching example", "bucket": "high"},
{"name": "ChatGPT: Starting Dropship Gun Business", "bucket": "high"},
{"name": "ChatGPT: Analyze business plan", "bucket": "high"},
{"name": "ChatGPT: Outdoor Layering Explained", "bucket": "mid"},
{"name": "ChatGPT: Limits in Calculus.", "bucket": "mid"},
{"name": "ChatGPT: Academic Program Director Role", "bucket": "mid"},
{"name": "ChatGPT: Lonely Island Poop Skit", "bucket": "mid"},
{"name": "ChatGPT: Parse Tidal playlist", "bucket": "mid"},
{"name": "NO thesis proposal.pdf", "bucket": "document"},
{"name": "PWM.pdf", "bucket": "document"},
{"name": "Will_It_Print.pdf", "bucket": "document"},
{"name": "Kim Kedem Ind Study F2025 Syllabus.docx", "bucket": "document"},
{"name": "Aaron Nelson Graduate Transcript.pdf", "bucket": "document"},
]
def get_pg():
return psycopg2.connect(PG_DSN)
def get_document_text(source_name):
pg = get_pg()
cur = pg.cursor()
cur.execute("SELECT document FROM embeddings WHERE source = %s ORDER BY id LIMIT 20", (source_name,))
rows = cur.fetchall()
pg.close()
return " ".join(r[0] for r in rows)[:12000]
def run_mistral(prompt_prefix, doc_text, label=""):
print(f" → Mistral {label} running...", flush=True)
payload = {"model": "mistral:latest", "prompt": prompt_prefix + doc_text, "stream": False, "format": "json"}
resp = requests.post("http://localhost:11434/api/generate", json=payload, timeout=300)
resp.raise_for_status()
raw = resp.json().get("response", "{}")
print(f" → Mistral {label} done ({len(raw)} chars)", flush=True)
try:
return json.loads(raw)
except Exception:
return {"error": "parse_failed", "raw": raw[:200]}
def build_taxfree_orientation(meta):
frames = ", ".join(meta.get("active_frames", []))
rel = meta.get("frame_relationships", "")
orient = meta.get("extraction_orientation", "")
summary = meta.get("one_sentence_summary", "")
return f"Active frames: {frames}. Frame relationships: {rel} Extraction focus: {orient} Summary: {summary}"
def build_standard_orientation(meta):
dc = meta.get("domain_class", "unknown")
pf = meta.get("primary_format", "unknown")
summary = meta.get("one_sentence_summary", "")
cs = meta.get("content_signals", {})
return (f"domain_class: {dc}\nprimary_format: {pf}\none_sentence_summary: {summary}\n"
f"has_named_people: {cs.get('has_named_people', False)}\n"
f"has_technical_terminology: {cs.get('has_technical_terminology', False)}")
def ingest(source_name, doc_text, orientation, group_id):
payload = {
"episodes": [{
"name": source_name,
"content": doc_text[:12000],
"source_description": orientation,
"timestamp": "2026-04-28T00:00:00",
}],
"group_id": group_id,
}
resp = requests.post(f"{GRAPHITI_URL}/episodes/bulk", json=payload, timeout=300)
resp.raise_for_status()
def save(results):
RESULTS_PATH.write_text(json.dumps(results, indent=2))
def run():
print("E1.8 — Ingest phase")
print("=" * 60)
# Load existing results if resuming
if RESULTS_PATH.exists():
results = json.loads(RESULTS_PATH.read_text())
done_a = {r["name"] for r in results.get("subsample_a", [])}
done_b = {r["name"] for r in results.get("subsample_b", [])}
print(f"Resuming: {len(done_a)} A done, {len(done_b)} B done")
else:
results = {"subsample_a": [], "subsample_b": []}
done_a, done_b = set(), set()
e14_data = json.loads((Path.home() / "aaronai" / "experiments" / "e14_per_source_comparison.json").read_text())
e14_by_name = {s["name"]: s for s in e14_data}
# Sub-sample A — taxonomy-free only (baseline + standard from E1.4)
print("\nSub-sample A — taxonomy-free ingestion only")
for item in SUBSAMPLE_A:
name = item["name"]
if name in done_a:
print(f" SKIP (done): {name}")
continue
print(f"\n {name}")
doc_text = get_document_text(name)
if not doc_text:
print(f" SKIP — no text")
continue
tf_meta = run_mistral(TAXFREE_PROMPT, doc_text, "taxfree")
print(f" frames: {tf_meta.get('active_frames', 'ERROR')}")
orientation = build_taxfree_orientation(tf_meta)
try:
ingest(name, doc_text, orientation, GROUP_TAXFREE)
time.sleep(3)
print(f" ingested to {GROUP_TAXFREE}")
except Exception as e:
print(f" ingest failed: {e}")
continue
e14 = e14_by_name.get(name, {})
results["subsample_a"].append({
"name": name,
"bucket": item["bucket"],
"taxfree_metadata": tf_meta,
"taxfree_orientation": orientation,
"e14_prod_preds": e14.get("prod_preds"),
"e14_cascade_preds": e14.get("cascade_preds"),
"e14_delta_preds": e14.get("delta_preds"),
"e14_prod_edges": e14.get("prod_edges"),
"e14_cascade_edges": e14.get("cascade_edges"),
"e14_delta_edges": e14.get("delta_edges"),
})
save(results)
# Sub-sample B — all three conditions
print("\nSub-sample B — all three conditions")
for item in SUBSAMPLE_B:
name = item["name"]
if name in done_b:
print(f" SKIP (done): {name}")
continue
print(f"\n {name} ({item['bucket']})")
doc_text = get_document_text(name)
if not doc_text:
print(f" SKIP — no text")
continue
entry = {"name": name, "bucket": item["bucket"],
"taxfree_metadata": None, "standard_metadata": None}
# Baseline
try:
ingest(name, doc_text, "", GROUP_BASELINE)
time.sleep(3)
print(f" baseline ingested")
except Exception as e:
print(f" baseline failed: {e}")
# Standard
std_meta = run_mistral(STANDARD_PROMPT, doc_text, "standard")
entry["standard_metadata"] = std_meta
try:
ingest(name, doc_text, build_standard_orientation(std_meta), GROUP_STANDARD)
time.sleep(3)
print(f" standard ingested, domain_class={std_meta.get('domain_class','?')}")
except Exception as e:
print(f" standard failed: {e}")
# Taxonomy-free
tf_meta = run_mistral(TAXFREE_PROMPT, doc_text, "taxfree")
entry["taxfree_metadata"] = tf_meta
print(f" frames: {tf_meta.get('active_frames', 'ERROR')}")
try:
ingest(name, doc_text, build_taxfree_orientation(tf_meta), GROUP_TAXFREE)
time.sleep(3)
print(f" taxfree ingested")
except Exception as e:
print(f" taxfree failed: {e}")
results["subsample_b"].append(entry)
save(results)
print("\n" + "=" * 60)
print(f"Ingest complete. Results at {RESULTS_PATH}")
print("Now run: python3 ~/aaronai/scripts/experiments/e1_8_eval.py")
if __name__ == "__main__":
run()
+204
View File
@@ -0,0 +1,204 @@
#!/usr/bin/env python3
"""
E1.9 Phase 1 — Retroactive validation
For each E1.8 source, query the production graph with frame_relationships
to get a coverage score, then check whether the routing tier prediction
matches the actual best-performing condition from E1.8.
No API spend required — uses existing E1.8 data and Graphiti search only.
"""
import json, requests
from pathlib import Path
GRAPHITI_URL = "http://localhost:8001"
E18_PATH = Path.home() / "aaronai" / "experiments" / "e1_8_eval.json"
E18_INGEST_PATH = Path.home() / "aaronai" / "experiments" / "e1_8_results.json"
RESULTS_PATH = Path.home() / "aaronai" / "experiments" / "e1_9_retroactive.json"
# Routing thresholds
HIGH_THRESHOLD = 0.70 # baseline
LOW_THRESHOLD = 0.40 # taxonomy-free
def get_coverage_score(query, group_id="aaron"):
"""Query production graph and return coverage score based on result count.
Score: 0 = no results, 0.33 = 1 result, 0.66 = 2 results, 1.0 = 3+ results.
Uses result count because Graphiti fulltext search returns score=0 for all hits.
"""
if not query or not query.strip():
return 0.0
try:
resp = requests.get(
f"{GRAPHITI_URL}/search",
params={"query": query, "limit": 3, "group_id": group_id},
timeout=30
)
resp.raise_for_status()
results = resp.json().get("results", [])
n = len(results)
return min(n / 3.0, 1.0)
except Exception as e:
print(f" Search error: {e}")
return 0.0
def assign_tier(coverage_score):
if coverage_score >= HIGH_THRESHOLD:
return "baseline"
elif coverage_score >= LOW_THRESHOLD:
return "standard"
else:
return "taxfree"
def best_condition_from_e18(record, subsample):
"""
Determine which condition actually performed best for this source in E1.8.
Sub-sample A: compare prod (baseline), e14 (standard cascade), tf (taxfree)
Sub-sample B: compare base, std, tf
"""
if subsample == "a":
prod = record.get("prod_preds") or 0
e14 = record.get("e14_preds") or 0
tf = record.get("tf_preds") or 0
best_score = max(prod, e14, tf)
if best_score == 0:
return "unknown"
if tf == best_score:
return "taxfree"
elif e14 == best_score:
return "standard"
else:
return "baseline"
else:
base = record.get("base_preds") or 0
std = record.get("std_preds") or 0
tf = record.get("tf_preds") or 0
best_score = max(base, std, tf)
if best_score == 0:
return "unknown"
if tf == best_score:
return "taxfree"
elif std == best_score:
return "standard"
else:
return "baseline"
def run():
print("E1.9 Phase 1 — Retroactive validation")
print("=" * 60)
e18_eval = json.loads(E18_PATH.read_text())
e18_ingest = json.loads(E18_INGEST_PATH.read_text())
# Build frame_relationships lookup from ingest results
fr_lookup = {}
for item in e18_ingest.get("subsample_a", []):
meta = item.get("taxfree_metadata", {})
if meta:
fr_lookup[item["name"]] = meta.get("frame_relationships", "")
for item in e18_ingest.get("subsample_b", []):
meta = item.get("taxfree_metadata", {})
if meta:
fr_lookup[item["name"]] = meta.get("frame_relationships", "")
results = []
correct = 0
total = 0
# Sub-sample A
print("\nSub-sample A")
print(f"{'Source':<50} {'cov':>5} {'tier':<10} {'predicted':<10} {'actual':<10} {'match'}")
print("-" * 95)
for record in e18_eval["subsample_a"]:
name = record["name"]
fr = fr_lookup.get(name, "")
coverage = get_coverage_score(fr)
tier = assign_tier(coverage)
actual_best = best_condition_from_e18(record, "a")
match = "" if tier == actual_best else ""
if actual_best != "unknown":
total += 1
if tier == actual_best:
correct += 1
display = name[:48] + ".." if len(name) > 50 else name
print(f"{display:<50} {coverage:>5.2f} {tier:<10} {tier:<10} {actual_best:<10} {match}")
results.append({
"name": name, "subsample": "a", "bucket": record.get("bucket"),
"frame_relationships": fr, "coverage_score": coverage,
"predicted_tier": tier, "actual_best": actual_best, "match": tier == actual_best,
})
# Sub-sample B
print("\nSub-sample B")
print(f"{'Source':<50} {'cov':>5} {'tier':<10} {'predicted':<10} {'actual':<10} {'match'}")
print("-" * 95)
for record in e18_eval["subsample_b"]:
name = record["name"]
fr = fr_lookup.get(name, "")
coverage = get_coverage_score(fr)
tier = assign_tier(coverage)
actual_best = best_condition_from_e18(record, "b")
match = "" if tier == actual_best else ""
if actual_best != "unknown":
total += 1
if tier == actual_best:
correct += 1
display = name[:48] + ".." if len(name) > 50 else name
print(f"{display:<50} {coverage:>5.2f} {tier:<10} {tier:<10} {actual_best:<10} {match}")
results.append({
"name": name, "subsample": "b", "bucket": record.get("bucket"),
"frame_relationships": fr, "coverage_score": coverage,
"predicted_tier": tier, "actual_best": actual_best, "match": tier == actual_best,
})
# Summary
rate = correct / total * 100 if total > 0 else 0
print(f"\n{'=' * 60}")
print(f"Validation rate: {correct}/{total} ({rate:.1f}%)")
print()
if rate >= 70:
print("✓ SIGNAL VALIDATED — coverage score predicts best condition")
print(" Proceed to Phase 2 (new ingestion with routing)")
elif rate >= 50:
print("~ MARGINAL — adjust thresholds before Phase 2")
print(" Review mismatch patterns below")
else:
print("✗ SIGNAL NOT PREDICTIVE — frame_relationships coverage")
print(" may not be the right signal. Consider active_frames fallback.")
# Mismatch analysis
mismatches = [r for r in results if not r["match"] and r["actual_best"] != "unknown"]
if mismatches:
print(f"\nMismatches ({len(mismatches)}):")
for r in mismatches:
print(f" [{r['bucket']:<8}] cov={r['coverage_score']:.2f} predicted={r['predicted_tier']} actual={r['actual_best']} | {r['name'][:50]}")
# Coverage score distribution
scores = [r["coverage_score"] for r in results]
print(f"\nCoverage score distribution:")
print(f" Mean: {sum(scores)/len(scores):.2f}")
print(f" Min: {min(scores):.2f}")
print(f" Max: {max(scores):.2f}")
high = sum(1 for s in scores if s >= HIGH_THRESHOLD)
mid = sum(1 for s in scores if LOW_THRESHOLD <= s < HIGH_THRESHOLD)
low = sum(1 for s in scores if s < LOW_THRESHOLD)
print(f" Tier distribution: baseline={high} standard={mid} taxfree={low}")
# Save
output = {
"validation_rate": rate,
"correct": correct,
"total": total,
"thresholds": {"high": HIGH_THRESHOLD, "low": LOW_THRESHOLD},
"results": results,
}
RESULTS_PATH.write_text(json.dumps(output, indent=2))
print(f"\nSaved to {RESULTS_PATH}")
if __name__ == "__main__":
run()
+257
View File
@@ -0,0 +1,257 @@
#!/usr/bin/env python3
"""
Experiment 005 — Actual API Token Measurement
Measures input token reduction from prepending v2 briefing vs raw document
on Claude Haiku, validating the 42.0% modeled estimate from Experiment 002b.
Outputs: ~/aaronai/experiments/token_measurement_results.json
"""
import json
import os
import statistics
import sys
import time
from datetime import datetime, timezone
from pathlib import Path
import anthropic
import psycopg2
from dotenv import load_dotenv
load_dotenv(Path.home() / "aaronai" / ".env")
INPUT_FILE = Path.home() / "aaronai" / "briefing_test_v2_results.json"
OUTPUT_FILE = Path.home() / "aaronai" / "experiments" / "token_measurement_results.json"
MODEL = "claude-haiku-4-5-20251001"
MAX_TOKENS = 1024
EXTRACTION_PROMPT = (
"Extract entities and their relationships from the document below. "
"Return ONLY valid JSON with this schema:\n"
"{\n"
' "people": [string],\n'
' "organizations": [string],\n'
' "locations": [string],\n'
' "dates": [string],\n'
' "relationships": [{"subject": string, "predicate": string, "object": string}]\n'
"}\n"
"No prose, no markdown fences, no commentary. JSON only."
)
def fetch_document_text(pg_conn, source):
"""Reconstruct the document by concatenating its chunks from pgvector."""
cur = pg_conn.cursor()
cur.execute(
"SELECT document FROM embeddings WHERE source = %s ORDER BY id",
(source,),
)
rows = cur.fetchall()
cur.close()
if not rows:
return None
return "\n\n".join(r[0] for r in rows)
def build_raw_message(document_text):
return f"{EXTRACTION_PROMPT}\n\nDOCUMENT:\n{document_text}"
def build_briefed_message(briefing, document_text):
briefing_str = json.dumps(briefing, indent=2)
return (
f"{EXTRACTION_PROMPT}\n\n"
f"BRIEFING (pre-analysis from local model — use to orient):\n{briefing_str}\n\n"
f"DOCUMENT:\n{document_text}"
)
def call_haiku(client, message_text):
t0 = time.time()
resp = client.messages.create(
model=MODEL,
max_tokens=MAX_TOKENS,
messages=[{"role": "user", "content": message_text}],
)
return {
"input_tokens": resp.usage.input_tokens,
"output_tokens": resp.usage.output_tokens,
"latency_s": round(time.time() - t0, 2),
"response_text": resp.content[0].text if resp.content else "",
"stop_reason": resp.stop_reason,
}
def ci_95(values):
if len(values) < 2:
return (statistics.mean(values) if values else 0.0, 0.0)
mean = statistics.mean(values)
half = 1.96 * statistics.stdev(values) / (len(values) ** 0.5)
return (mean, half)
def main():
if not INPUT_FILE.exists():
print(f"ERROR: {INPUT_FILE} not found", file=sys.stderr)
sys.exit(1)
api_key = os.environ.get("ANTHROPIC_API_KEY")
if not api_key:
print("ERROR: ANTHROPIC_API_KEY not set", file=sys.stderr)
sys.exit(1)
pg_dsn = os.environ.get("PG_DSN")
if not pg_dsn:
print("ERROR: PG_DSN not set", file=sys.stderr)
sys.exit(1)
client = anthropic.Anthropic(api_key=api_key)
pg_conn = psycopg2.connect(pg_dsn)
with open(INPUT_FILE) as f:
v2_data = json.load(f)
docs_meta = [
d for d in v2_data["documents"]
if d.get("status") == "SUCCESS"
and d.get("briefing")
]
print(f"Loaded {len(docs_meta)} successful briefings from {INPUT_FILE.name}")
print(f"Model: {MODEL}")
print(f"Calls planned: up to {len(docs_meta) * 2}\n")
results = []
started_at = datetime.now(timezone.utc).isoformat()
t_total = time.time()
for i, doc in enumerate(docs_meta, 1):
source = doc["source"]
briefing = doc["briefing"]
document_text = fetch_document_text(pg_conn, source)
if not document_text:
print(f"[{i:02d}/{len(docs_meta)}] {source[:60]} -- SKIP (not in pgvector)")
results.append({"source": source, "skipped": "not_in_pgvector"})
continue
print(f"[{i:02d}/{len(docs_meta)}] {source[:60]}")
try:
raw_result = call_haiku(client, build_raw_message(document_text))
except Exception as e:
print(f" RAW FAILED: {e}")
raw_result = {"error": str(e)}
try:
briefed_result = call_haiku(client, build_briefed_message(briefing, document_text))
except Exception as e:
print(f" BRIEFED FAILED: {e}")
briefed_result = {"error": str(e)}
delta = None
if "input_tokens" in raw_result and "input_tokens" in briefed_result:
raw_in = raw_result["input_tokens"]
briefed_in = briefed_result["input_tokens"]
raw_out = raw_result["output_tokens"]
briefed_out = briefed_result["output_tokens"]
input_red = (raw_in - briefed_in) / raw_in * 100 if raw_in else 0.0
output_delta = (briefed_out - raw_out) / raw_out * 100 if raw_out else 0.0
delta = {
"input_reduction_pct": round(input_red, 2),
"output_delta_pct": round(output_delta, 2),
"raw_input_tokens": raw_in,
"briefed_input_tokens": briefed_in,
"raw_output_tokens": raw_out,
"briefed_output_tokens": briefed_out,
}
print(
f" in: {raw_in} -> {briefed_in} ({input_red:+.1f}%) | "
f"out: {raw_out} -> {briefed_out}"
)
results.append({
"source": source,
"raw": raw_result,
"briefed": briefed_result,
"delta": delta,
})
pg_conn.close()
total_elapsed = round(time.time() - t_total, 1)
valid = [r for r in results if r.get("delta") is not None]
skipped = [r for r in results if r.get("skipped")]
reductions = [r["delta"]["input_reduction_pct"] for r in valid]
output_deltas = [r["delta"]["output_delta_pct"] for r in valid]
raw_in_total = sum(r["delta"]["raw_input_tokens"] for r in valid)
briefed_in_total = sum(r["delta"]["briefed_input_tokens"] for r in valid)
raw_out_total = sum(r["delta"]["raw_output_tokens"] for r in valid)
briefed_out_total = sum(r["delta"]["briefed_output_tokens"] for r in valid)
HAIKU_IN = 1.0
HAIKU_OUT = 5.0
raw_cost = (raw_in_total * HAIKU_IN + raw_out_total * HAIKU_OUT) / 1_000_000
briefed_cost = (briefed_in_total * HAIKU_IN + briefed_out_total * HAIKU_OUT) / 1_000_000
mean_red, ci_half = ci_95(reductions)
mean_out_delta, _ = ci_95(output_deltas)
summary = {
"experiment": "005",
"title": "Actual API Token Measurement",
"started_at": started_at,
"completed_at": datetime.now(timezone.utc).isoformat(),
"model": MODEL,
"extraction_prompt": EXTRACTION_PROMPT,
"n_documents_attempted": len(docs_meta),
"n_skipped_not_in_pgvector": len(skipped),
"n_valid_pairs": len(valid),
"n_failed": len(docs_meta) - len(valid) - len(skipped),
"total_elapsed_s": total_elapsed,
"input_token_reduction": {
"mean_pct": round(mean_red, 2),
"ci_95_half_width_pct": round(ci_half, 2),
"median_pct": round(statistics.median(reductions), 2) if reductions else None,
"min_pct": round(min(reductions), 2) if reductions else None,
"max_pct": round(max(reductions), 2) if reductions else None,
"stdev_pct": round(statistics.stdev(reductions), 2) if len(reductions) > 1 else 0.0,
},
"output_token_delta": {"mean_pct": round(mean_out_delta, 2)},
"totals": {
"raw_input_tokens": raw_in_total,
"briefed_input_tokens": briefed_in_total,
"raw_output_tokens": raw_out_total,
"briefed_output_tokens": briefed_out_total,
"raw_cost_usd": round(raw_cost, 4),
"briefed_cost_usd": round(briefed_cost, 4),
"savings_usd": round(raw_cost - briefed_cost, 4),
},
"comparison_to_v2_estimate": {
"v2_modeled_reduction_pct": 42.0,
"measured_mean_reduction_pct": round(mean_red, 2),
"delta_pct_points": round(mean_red - 42.0, 2),
},
"results": results,
}
OUTPUT_FILE.parent.mkdir(parents=True, exist_ok=True)
with open(OUTPUT_FILE, "w") as f:
json.dump(summary, f, indent=2)
print()
print("=" * 60)
print(f"DONE — {len(valid)}/{len(docs_meta)} valid pairs in {total_elapsed}s")
if skipped:
print(f"Skipped (not in pgvector): {len(skipped)}")
print(f"Mean input token reduction: {mean_red:.2f}% +/- {ci_half:.2f}% (95% CI)")
print(f"V2 modeled estimate: 42.0% | delta: {mean_red - 42.0:+.2f} pts")
print(f"Mean output token delta: {mean_out_delta:+.2f}%")
print(f"Total cost: ${raw_cost + briefed_cost:.4f}")
print(f"Results: {OUTPUT_FILE}")
if __name__ == "__main__":
main()
+43 -8
View File
@@ -299,22 +299,57 @@ class IngestHandler(FileSystemEventHandler):
self.pending = False self.pending = False
self.last_event = 0 self.last_event = 0
def on_any_event(self, event): def _should_ignore(self, path: Path) -> bool:
if path.name.startswith((".", "~$")):
return True
if "Admin/Backups" in str(path) or "Backups" in path.parts:
return True
if "Journal/Media" in str(path):
return True
return False
def on_created(self, event):
if event.is_directory: if event.is_directory:
return return
path = Path(event.src_path) path = Path(event.src_path)
if path.suffix.lower() not in SUPPORTED: if path.suffix.lower() not in SUPPORTED or self._should_ignore(path):
return return
if path.name.startswith((".", "~$")): log.info(f"Event: created {path}")
self.pending = True
self.last_event = time.time()
def on_modified(self, event):
if event.is_directory:
return return
if "Admin/Backups" in str(path) or "Backups" in path.parts: path = Path(event.src_path)
if path.suffix.lower() not in SUPPORTED or self._should_ignore(path):
return return
if "Journal/Media" in str(path): log.info(f"Event: modified {path}")
self.pending = True
self.last_event = time.time()
def on_moved(self, event):
if event.is_directory:
return return
if event.event_type not in ("modified", "created", "moved"): # Nextcloud WebDAV writes .part temp files then renames to final path.
# src_path is the .part file; dest_path is the final filename.
dest = Path(event.dest_path)
if dest.suffix.lower() not in SUPPORTED or self._should_ignore(dest):
return return
log.info(f"Event: {event.event_type} {event.src_path}") log.info(f"Event: moved -> {dest}")
self.pending = True self.pending = True
self.last_event = time.time()
def on_closed(self, event):
# FileClosedEvent fires on the final file after Nextcloud completes write.
# Belt-and-suspenders catch for any write pattern not caught by on_moved.
if event.is_directory:
return
path = Path(event.src_path)
if path.suffix.lower() not in SUPPORTED or self._should_ignore(path):
return
log.info(f"Event: closed {path}")
self.pending = True
self.last_event = time.time() self.last_event = time.time()