1718 lines
51 KiB
JSON
1718 lines
51 KiB
JSON
{
|
|
"meta": {
|
|
"model": "mistral",
|
|
"version": "v2",
|
|
"sample_size": 50,
|
|
"started": "2026-04-28T02:50:20.797376",
|
|
"completed": "2026-04-28T03:08:37.169551",
|
|
"total_elapsed_seconds": 1096.7,
|
|
"avg_seconds_per_doc": 21.9
|
|
},
|
|
"documents": [
|
|
{
|
|
"id": "5ee0b3bb_0",
|
|
"source": "00_Syllabus.docx",
|
|
"content_hash": "848c971c",
|
|
"content_length": 2273,
|
|
"cleaned_content_length": 2273,
|
|
"status": "SUCCESS",
|
|
"pre_classified_type": null,
|
|
"was_pre_classified": false,
|
|
"elapsed_seconds": 36.3,
|
|
"briefing": {
|
|
"document_type": "syllabus",
|
|
"primary_language": "en",
|
|
"density": "high",
|
|
"has_proper_nouns": true,
|
|
"has_dates": false,
|
|
"has_numeric_data": true,
|
|
"has_institutional_language": true,
|
|
"has_technical_terms": true,
|
|
"likely_has_named_entities": true,
|
|
"structure_signals": [
|
|
"headings",
|
|
"bullet_lists"
|
|
],
|
|
"noise_signals": [],
|
|
"extraction_priority": "full"
|
|
},
|
|
"token_reduction_estimate": {
|
|
"original_tokens_approx": 568,
|
|
"orientation_tokens_saved": 200,
|
|
"noise_tokens_saved": 0,
|
|
"noise_reduction_pct": 0.0,
|
|
"total_reduction_pct": 26.0
|
|
}
|
|
},
|
|
{
|
|
"id": "4451e0d5_1",
|
|
"source": "01_ALL_Overview of AM and 3DP_v3.pptx",
|
|
"content_hash": "1e3ff98f",
|
|
"content_length": 2167,
|
|
"cleaned_content_length": 2167,
|
|
"status": "SUCCESS",
|
|
"pre_classified_type": "presentation",
|
|
"was_pre_classified": true,
|
|
"elapsed_seconds": 33.7,
|
|
"briefing": {
|
|
"document_type": "presentation",
|
|
"primary_language": "en",
|
|
"density": "high",
|
|
"has_proper_nouns": true,
|
|
"has_dates": false,
|
|
"has_numeric_data": false,
|
|
"has_institutional_language": false,
|
|
"has_technical_terms": true,
|
|
"likely_has_named_entities": true,
|
|
"structure_signals": [
|
|
"headings",
|
|
"numbered_lists"
|
|
],
|
|
"noise_signals": [],
|
|
"extraction_priority": "full"
|
|
},
|
|
"token_reduction_estimate": {
|
|
"original_tokens_approx": 542,
|
|
"orientation_tokens_saved": 200,
|
|
"noise_tokens_saved": 0,
|
|
"noise_reduction_pct": 0.0,
|
|
"total_reduction_pct": 27.0
|
|
}
|
|
},
|
|
{
|
|
"id": "0619cec0_0",
|
|
"source": "01_NURBS Curves.docx",
|
|
"content_hash": "2ac1bb56",
|
|
"content_length": 1401,
|
|
"cleaned_content_length": 1401,
|
|
"status": "SUCCESS",
|
|
"pre_classified_type": null,
|
|
"was_pre_classified": false,
|
|
"elapsed_seconds": 33.0,
|
|
"briefing": {
|
|
"document_type": "technical_doc",
|
|
"primary_language": "en",
|
|
"density": "high",
|
|
"has_proper_nouns": true,
|
|
"has_dates": false,
|
|
"has_numeric_data": true,
|
|
"has_institutional_language": false,
|
|
"has_technical_terms": true,
|
|
"likely_has_named_entities": true,
|
|
"structure_signals": [
|
|
"headings",
|
|
"bullet_lists"
|
|
],
|
|
"noise_signals": [],
|
|
"extraction_priority": "full"
|
|
},
|
|
"token_reduction_estimate": {
|
|
"original_tokens_approx": 350,
|
|
"orientation_tokens_saved": 200,
|
|
"noise_tokens_saved": 0,
|
|
"noise_reduction_pct": 0.0,
|
|
"total_reduction_pct": 36.3
|
|
}
|
|
},
|
|
{
|
|
"id": "d0a3917e_0",
|
|
"source": "02_2D Geometry.docx",
|
|
"content_hash": "5d53f099",
|
|
"content_length": 188,
|
|
"cleaned_content_length": 188,
|
|
"status": "SUCCESS",
|
|
"pre_classified_type": null,
|
|
"was_pre_classified": false,
|
|
"elapsed_seconds": 10.6,
|
|
"briefing": {
|
|
"document_type": "technical_doc",
|
|
"primary_language": "en",
|
|
"density": "high",
|
|
"has_proper_nouns": true,
|
|
"has_dates": false,
|
|
"has_numeric_data": true,
|
|
"has_institutional_language": false,
|
|
"has_technical_terms": true,
|
|
"likely_has_named_entities": true,
|
|
"structure_signals": [
|
|
"headings",
|
|
"bullet_lists"
|
|
],
|
|
"noise_signals": [],
|
|
"extraction_priority": "full"
|
|
},
|
|
"token_reduction_estimate": {
|
|
"original_tokens_approx": 47,
|
|
"orientation_tokens_saved": 200,
|
|
"noise_tokens_saved": 0,
|
|
"noise_reduction_pct": 0.0,
|
|
"total_reduction_pct": 81.0
|
|
}
|
|
},
|
|
{
|
|
"id": "89fed291_0",
|
|
"source": "02_Point of Curves - AARON.docx",
|
|
"content_hash": "864be8ed",
|
|
"content_length": 2116,
|
|
"cleaned_content_length": 2116,
|
|
"status": "SUCCESS",
|
|
"pre_classified_type": null,
|
|
"was_pre_classified": false,
|
|
"elapsed_seconds": 20.1,
|
|
"briefing": {
|
|
"document_type": "technical_doc",
|
|
"primary_language": "en",
|
|
"density": "high",
|
|
"has_proper_nouns": true,
|
|
"has_dates": false,
|
|
"has_numeric_data": true,
|
|
"has_institutional_language": false,
|
|
"has_technical_terms": true,
|
|
"likely_has_named_entities": true,
|
|
"structure_signals": [
|
|
"headings",
|
|
"bullet_lists"
|
|
],
|
|
"noise_signals": [],
|
|
"extraction_priority": "full"
|
|
},
|
|
"token_reduction_estimate": {
|
|
"original_tokens_approx": 529,
|
|
"orientation_tokens_saved": 200,
|
|
"noise_tokens_saved": 0,
|
|
"noise_reduction_pct": 0.0,
|
|
"total_reduction_pct": 27.4
|
|
}
|
|
},
|
|
{
|
|
"id": "2a15be8d_0",
|
|
"source": "02_Point of Curves.docx",
|
|
"content_hash": "4b683753",
|
|
"content_length": 1338,
|
|
"cleaned_content_length": 1338,
|
|
"status": "SUCCESS",
|
|
"pre_classified_type": null,
|
|
"was_pre_classified": false,
|
|
"elapsed_seconds": 15.8,
|
|
"briefing": {
|
|
"document_type": "technical_doc",
|
|
"primary_language": "en",
|
|
"density": "high",
|
|
"has_proper_nouns": true,
|
|
"has_dates": false,
|
|
"has_numeric_data": true,
|
|
"has_institutional_language": false,
|
|
"has_technical_terms": true,
|
|
"likely_has_named_entities": true,
|
|
"structure_signals": [
|
|
"headings"
|
|
],
|
|
"noise_signals": [],
|
|
"extraction_priority": "full"
|
|
},
|
|
"token_reduction_estimate": {
|
|
"original_tokens_approx": 334,
|
|
"orientation_tokens_saved": 200,
|
|
"noise_tokens_saved": 0,
|
|
"noise_reduction_pct": 0.0,
|
|
"total_reduction_pct": 37.4
|
|
}
|
|
},
|
|
{
|
|
"id": "a2a7a8d3_2",
|
|
"source": "02_PPT_ALL_AM_Technologies_for_3DP_v3.pptx",
|
|
"content_hash": "1c260a03",
|
|
"content_length": 1675,
|
|
"cleaned_content_length": 1675,
|
|
"status": "SUCCESS",
|
|
"pre_classified_type": "presentation",
|
|
"was_pre_classified": true,
|
|
"elapsed_seconds": 34.3,
|
|
"briefing": {
|
|
"document_type": "presentation",
|
|
"primary_language": "en",
|
|
"density": "high",
|
|
"has_proper_nouns": true,
|
|
"has_dates": false,
|
|
"has_numeric_data": false,
|
|
"has_institutional_language": false,
|
|
"has_technical_terms": true,
|
|
"likely_has_named_entities": true,
|
|
"structure_signals": [
|
|
"headings",
|
|
"numbered_lists"
|
|
],
|
|
"noise_signals": [],
|
|
"extraction_priority": "full"
|
|
},
|
|
"token_reduction_estimate": {
|
|
"original_tokens_approx": 419,
|
|
"orientation_tokens_saved": 200,
|
|
"noise_tokens_saved": 0,
|
|
"noise_reduction_pct": 0.0,
|
|
"total_reduction_pct": 32.3
|
|
}
|
|
},
|
|
{
|
|
"id": "2b3b1c34_0",
|
|
"source": "03_2D Transformation and Deformation.docx",
|
|
"content_hash": "306dc581",
|
|
"content_length": 418,
|
|
"cleaned_content_length": 418,
|
|
"status": "SUCCESS",
|
|
"pre_classified_type": null,
|
|
"was_pre_classified": false,
|
|
"elapsed_seconds": 24.7,
|
|
"briefing": {
|
|
"document_type": "technical_doc",
|
|
"primary_language": "en",
|
|
"density": "high",
|
|
"has_proper_nouns": true,
|
|
"has_dates": false,
|
|
"has_numeric_data": true,
|
|
"has_institutional_language": false,
|
|
"has_technical_terms": true,
|
|
"likely_has_named_entities": true,
|
|
"structure_signals": [
|
|
"headings"
|
|
],
|
|
"noise_signals": [],
|
|
"extraction_priority": "full"
|
|
},
|
|
"token_reduction_estimate": {
|
|
"original_tokens_approx": 104,
|
|
"orientation_tokens_saved": 200,
|
|
"noise_tokens_saved": 0,
|
|
"noise_reduction_pct": 0.0,
|
|
"total_reduction_pct": 65.7
|
|
}
|
|
},
|
|
{
|
|
"id": "b425985b_0",
|
|
"source": "03_2D Transformation, Deformation, and Editing-AARON.docx",
|
|
"content_hash": "9f9c422a",
|
|
"content_length": 541,
|
|
"cleaned_content_length": 541,
|
|
"status": "SUCCESS",
|
|
"pre_classified_type": null,
|
|
"was_pre_classified": false,
|
|
"elapsed_seconds": 12.0,
|
|
"briefing": {
|
|
"document_type": "technical_doc",
|
|
"primary_language": "en",
|
|
"density": "high",
|
|
"has_proper_nouns": true,
|
|
"has_dates": false,
|
|
"has_numeric_data": false,
|
|
"has_institutional_language": false,
|
|
"has_technical_terms": true,
|
|
"likely_has_named_entities": true,
|
|
"structure_signals": [
|
|
"headings",
|
|
"bullet_lists"
|
|
],
|
|
"noise_signals": [],
|
|
"extraction_priority": "full"
|
|
},
|
|
"token_reduction_estimate": {
|
|
"original_tokens_approx": 135,
|
|
"orientation_tokens_saved": 200,
|
|
"noise_tokens_saved": 0,
|
|
"noise_reduction_pct": 0.0,
|
|
"total_reduction_pct": 59.7
|
|
}
|
|
},
|
|
{
|
|
"id": "1c78c79f_0",
|
|
"source": "03_Editing Geometry.docx",
|
|
"content_hash": "9491f6cc",
|
|
"content_length": 171,
|
|
"cleaned_content_length": 171,
|
|
"status": "SUCCESS",
|
|
"pre_classified_type": null,
|
|
"was_pre_classified": false,
|
|
"elapsed_seconds": 10.1,
|
|
"briefing": {
|
|
"document_type": "technical_doc",
|
|
"primary_language": "en",
|
|
"density": "high",
|
|
"has_proper_nouns": true,
|
|
"has_dates": false,
|
|
"has_numeric_data": true,
|
|
"has_institutional_language": false,
|
|
"has_technical_terms": true,
|
|
"likely_has_named_entities": true,
|
|
"structure_signals": [
|
|
"headings"
|
|
],
|
|
"noise_signals": [],
|
|
"extraction_priority": "full"
|
|
},
|
|
"token_reduction_estimate": {
|
|
"original_tokens_approx": 43,
|
|
"orientation_tokens_saved": 200,
|
|
"noise_tokens_saved": 0,
|
|
"noise_reduction_pct": 0.0,
|
|
"total_reduction_pct": 82.4
|
|
}
|
|
},
|
|
{
|
|
"id": "6453f3a8_6",
|
|
"source": "04_ALL_Materials and Their Properties_v3.pptx",
|
|
"content_hash": "d1ff494c",
|
|
"content_length": 2999,
|
|
"cleaned_content_length": 2999,
|
|
"status": "SUCCESS",
|
|
"pre_classified_type": "presentation",
|
|
"was_pre_classified": true,
|
|
"elapsed_seconds": 33.7,
|
|
"briefing": {
|
|
"document_type": "presentation",
|
|
"primary_language": "en",
|
|
"density": "high",
|
|
"has_proper_nouns": true,
|
|
"has_dates": false,
|
|
"has_numeric_data": true,
|
|
"has_institutional_language": false,
|
|
"has_technical_terms": true,
|
|
"likely_has_named_entities": true,
|
|
"structure_signals": [
|
|
"headings",
|
|
"bullet_lists"
|
|
],
|
|
"noise_signals": [],
|
|
"extraction_priority": "full"
|
|
},
|
|
"token_reduction_estimate": {
|
|
"original_tokens_approx": 750,
|
|
"orientation_tokens_saved": 200,
|
|
"noise_tokens_saved": 0,
|
|
"noise_reduction_pct": 0.0,
|
|
"total_reduction_pct": 21.1
|
|
}
|
|
},
|
|
{
|
|
"id": "1fd396d4_0",
|
|
"source": "04_Annotations.docx",
|
|
"content_hash": "9b3e57cd",
|
|
"content_length": 737,
|
|
"cleaned_content_length": 737,
|
|
"status": "SUCCESS",
|
|
"pre_classified_type": null,
|
|
"was_pre_classified": false,
|
|
"elapsed_seconds": 27.0,
|
|
"briefing": {
|
|
"document_type": "technical_doc",
|
|
"primary_language": "en",
|
|
"density": "high",
|
|
"has_proper_nouns": true,
|
|
"has_dates": false,
|
|
"has_numeric_data": true,
|
|
"has_institutional_language": false,
|
|
"has_technical_terms": true,
|
|
"likely_has_named_entities": true,
|
|
"structure_signals": [
|
|
"headings",
|
|
"numbered_lists"
|
|
],
|
|
"noise_signals": [],
|
|
"extraction_priority": "full"
|
|
},
|
|
"token_reduction_estimate": {
|
|
"original_tokens_approx": 184,
|
|
"orientation_tokens_saved": 200,
|
|
"noise_tokens_saved": 0,
|
|
"noise_reduction_pct": 0.0,
|
|
"total_reduction_pct": 52.0
|
|
}
|
|
},
|
|
{
|
|
"id": "38d1cf0d_0",
|
|
"source": "05_Entering the third dimension.docx",
|
|
"content_hash": "8f56202a",
|
|
"content_length": 2175,
|
|
"cleaned_content_length": 2175,
|
|
"status": "SUCCESS",
|
|
"pre_classified_type": null,
|
|
"was_pre_classified": false,
|
|
"elapsed_seconds": 19.2,
|
|
"briefing": {
|
|
"document_type": "technical_doc",
|
|
"primary_language": "en",
|
|
"density": "high",
|
|
"has_proper_nouns": true,
|
|
"has_dates": false,
|
|
"has_numeric_data": true,
|
|
"has_institutional_language": false,
|
|
"has_technical_terms": true,
|
|
"likely_has_named_entities": true,
|
|
"structure_signals": [
|
|
"headings",
|
|
"bullet_lists"
|
|
],
|
|
"noise_signals": [],
|
|
"extraction_priority": "full"
|
|
},
|
|
"token_reduction_estimate": {
|
|
"original_tokens_approx": 544,
|
|
"orientation_tokens_saved": 200,
|
|
"noise_tokens_saved": 0,
|
|
"noise_reduction_pct": 0.0,
|
|
"total_reduction_pct": 26.9
|
|
}
|
|
},
|
|
{
|
|
"id": "cfd1ee43_0",
|
|
"source": "05_Making things solid.docx",
|
|
"content_hash": "12634c4c",
|
|
"content_length": 692,
|
|
"cleaned_content_length": 692,
|
|
"status": "SUCCESS",
|
|
"pre_classified_type": null,
|
|
"was_pre_classified": false,
|
|
"elapsed_seconds": 13.8,
|
|
"briefing": {
|
|
"document_type": "technical_doc",
|
|
"primary_language": "en",
|
|
"density": "high",
|
|
"has_proper_nouns": true,
|
|
"has_dates": false,
|
|
"has_numeric_data": true,
|
|
"has_institutional_language": false,
|
|
"has_technical_terms": true,
|
|
"likely_has_named_entities": true,
|
|
"structure_signals": [
|
|
"headings",
|
|
"numbered_lists"
|
|
],
|
|
"noise_signals": [
|
|
"boilerplate"
|
|
],
|
|
"extraction_priority": "full"
|
|
},
|
|
"token_reduction_estimate": {
|
|
"original_tokens_approx": 173,
|
|
"orientation_tokens_saved": 200,
|
|
"noise_tokens_saved": 9,
|
|
"noise_reduction_pct": 5.0,
|
|
"total_reduction_pct": 55.9
|
|
}
|
|
},
|
|
{
|
|
"id": "31c729e4_8",
|
|
"source": "05_PPT_ALL_Machine Technology and Specifications_v3.pptx",
|
|
"content_hash": "bf8daf4b",
|
|
"content_length": 2886,
|
|
"cleaned_content_length": 2886,
|
|
"status": "SUCCESS",
|
|
"pre_classified_type": "presentation",
|
|
"was_pre_classified": true,
|
|
"elapsed_seconds": 32.0,
|
|
"briefing": {
|
|
"document_type": "presentation",
|
|
"primary_language": "en",
|
|
"density": "high",
|
|
"has_proper_nouns": true,
|
|
"has_dates": false,
|
|
"has_numeric_data": true,
|
|
"has_institutional_language": true,
|
|
"has_technical_terms": true,
|
|
"likely_has_named_entities": true,
|
|
"structure_signals": [
|
|
"headings",
|
|
"bullet_lists"
|
|
],
|
|
"noise_signals": [],
|
|
"extraction_priority": "full"
|
|
},
|
|
"token_reduction_estimate": {
|
|
"original_tokens_approx": 722,
|
|
"orientation_tokens_saved": 200,
|
|
"noise_tokens_saved": 0,
|
|
"noise_reduction_pct": 0.0,
|
|
"total_reduction_pct": 21.7
|
|
}
|
|
},
|
|
{
|
|
"id": "a8d4d8a4_0",
|
|
"source": "06_3D_Editing.docx",
|
|
"content_hash": "887133dc",
|
|
"content_length": 157,
|
|
"cleaned_content_length": 157,
|
|
"status": "SUCCESS",
|
|
"pre_classified_type": null,
|
|
"was_pre_classified": false,
|
|
"elapsed_seconds": 23.4,
|
|
"briefing": {
|
|
"document_type": "technical_doc",
|
|
"primary_language": "en",
|
|
"density": "high",
|
|
"has_proper_nouns": true,
|
|
"has_dates": false,
|
|
"has_numeric_data": true,
|
|
"has_institutional_language": false,
|
|
"has_technical_terms": true,
|
|
"likely_has_named_entities": true,
|
|
"structure_signals": [
|
|
"headings",
|
|
"numbered_lists"
|
|
],
|
|
"noise_signals": [],
|
|
"extraction_priority": "full"
|
|
},
|
|
"token_reduction_estimate": {
|
|
"original_tokens_approx": 39,
|
|
"orientation_tokens_saved": 200,
|
|
"noise_tokens_saved": 0,
|
|
"noise_reduction_pct": 0.0,
|
|
"total_reduction_pct": 83.6
|
|
}
|
|
},
|
|
{
|
|
"id": "c0d0659e_0",
|
|
"source": "06_Gumball.docx",
|
|
"content_hash": "c46dbc48",
|
|
"content_length": 1980,
|
|
"cleaned_content_length": 1980,
|
|
"status": "SUCCESS",
|
|
"pre_classified_type": null,
|
|
"was_pre_classified": false,
|
|
"elapsed_seconds": 21.0,
|
|
"briefing": {
|
|
"document_type": "technical_doc",
|
|
"primary_language": "en",
|
|
"density": "high",
|
|
"has_proper_nouns": true,
|
|
"has_dates": false,
|
|
"has_numeric_data": true,
|
|
"has_institutional_language": false,
|
|
"has_technical_terms": true,
|
|
"likely_has_named_entities": true,
|
|
"structure_signals": [
|
|
"headings",
|
|
"numbered_lists"
|
|
],
|
|
"noise_signals": [],
|
|
"extraction_priority": "full"
|
|
},
|
|
"token_reduction_estimate": {
|
|
"original_tokens_approx": 495,
|
|
"orientation_tokens_saved": 200,
|
|
"noise_tokens_saved": 0,
|
|
"noise_reduction_pct": 0.0,
|
|
"total_reduction_pct": 28.8
|
|
}
|
|
},
|
|
{
|
|
"id": "8f9d093e_5",
|
|
"source": "06_PPT_ALL_Design Considerations_From CAD to CAM_v3.pptx",
|
|
"content_hash": "2215d29f",
|
|
"content_length": 2945,
|
|
"cleaned_content_length": 2945,
|
|
"status": "SUCCESS",
|
|
"pre_classified_type": "presentation",
|
|
"was_pre_classified": true,
|
|
"elapsed_seconds": 33.4,
|
|
"briefing": {
|
|
"document_type": "presentation",
|
|
"primary_language": "en",
|
|
"density": "high",
|
|
"has_proper_nouns": true,
|
|
"has_dates": false,
|
|
"has_numeric_data": true,
|
|
"has_institutional_language": false,
|
|
"has_technical_terms": true,
|
|
"likely_has_named_entities": true,
|
|
"structure_signals": [
|
|
"headings",
|
|
"numbered_lists",
|
|
"tables"
|
|
],
|
|
"noise_signals": [],
|
|
"extraction_priority": "full"
|
|
},
|
|
"token_reduction_estimate": {
|
|
"original_tokens_approx": 736,
|
|
"orientation_tokens_saved": 200,
|
|
"noise_tokens_saved": 0,
|
|
"noise_reduction_pct": 0.0,
|
|
"total_reduction_pct": 21.4
|
|
}
|
|
},
|
|
{
|
|
"id": "4e7db487_0",
|
|
"source": "07_Cube Assignment_2018f.docx",
|
|
"content_hash": "af2f5bab",
|
|
"content_length": 1316,
|
|
"cleaned_content_length": 1316,
|
|
"status": "SUCCESS",
|
|
"pre_classified_type": null,
|
|
"was_pre_classified": false,
|
|
"elapsed_seconds": 32.0,
|
|
"briefing": {
|
|
"document_type": "technical_doc",
|
|
"primary_language": "en",
|
|
"density": "high",
|
|
"has_proper_nouns": true,
|
|
"has_dates": false,
|
|
"has_numeric_data": true,
|
|
"has_institutional_language": false,
|
|
"has_technical_terms": true,
|
|
"likely_has_named_entities": true,
|
|
"structure_signals": [
|
|
"headings",
|
|
"numbered_lists"
|
|
],
|
|
"noise_signals": [],
|
|
"extraction_priority": "full"
|
|
},
|
|
"token_reduction_estimate": {
|
|
"original_tokens_approx": 329,
|
|
"orientation_tokens_saved": 200,
|
|
"noise_tokens_saved": 0,
|
|
"noise_reduction_pct": 0.0,
|
|
"total_reduction_pct": 37.8
|
|
}
|
|
},
|
|
{
|
|
"id": "1a5b6da3_0",
|
|
"source": "07_Make2D.docx",
|
|
"content_hash": "d71c1df4",
|
|
"content_length": 834,
|
|
"cleaned_content_length": 834,
|
|
"status": "SUCCESS",
|
|
"pre_classified_type": null,
|
|
"was_pre_classified": false,
|
|
"elapsed_seconds": 14.8,
|
|
"briefing": {
|
|
"document_type": "technical_doc",
|
|
"primary_language": "en",
|
|
"density": "high",
|
|
"has_proper_nouns": true,
|
|
"has_dates": false,
|
|
"has_numeric_data": true,
|
|
"has_institutional_language": false,
|
|
"has_technical_terms": true,
|
|
"likely_has_named_entities": true,
|
|
"structure_signals": [
|
|
"headings",
|
|
"numbered_lists"
|
|
],
|
|
"noise_signals": [],
|
|
"extraction_priority": "full"
|
|
},
|
|
"token_reduction_estimate": {
|
|
"original_tokens_approx": 208,
|
|
"orientation_tokens_saved": 200,
|
|
"noise_tokens_saved": 0,
|
|
"noise_reduction_pct": 0.0,
|
|
"total_reduction_pct": 49.0
|
|
}
|
|
},
|
|
{
|
|
"id": "97ba28bd_5",
|
|
"source": "07_PPT_ALL_Fabrication Considerations_v3.pptx",
|
|
"content_hash": "7ffb6f57",
|
|
"content_length": 710,
|
|
"cleaned_content_length": 710,
|
|
"status": "SUCCESS",
|
|
"pre_classified_type": "presentation",
|
|
"was_pre_classified": true,
|
|
"elapsed_seconds": 26.6,
|
|
"briefing": {
|
|
"document_type": "presentation",
|
|
"primary_language": "en",
|
|
"density": "high",
|
|
"has_proper_nouns": true,
|
|
"has_dates": true,
|
|
"has_numeric_data": true,
|
|
"has_institutional_language": false,
|
|
"has_technical_terms": true,
|
|
"likely_has_named_entities": true,
|
|
"structure_signals": [
|
|
"tables",
|
|
"headings"
|
|
],
|
|
"noise_signals": [],
|
|
"extraction_priority": "full"
|
|
},
|
|
"token_reduction_estimate": {
|
|
"original_tokens_approx": 178,
|
|
"orientation_tokens_saved": 200,
|
|
"noise_tokens_saved": 0,
|
|
"noise_reduction_pct": 0.0,
|
|
"total_reduction_pct": 53.0
|
|
}
|
|
},
|
|
{
|
|
"id": "0892b9fa_8",
|
|
"source": "08_PPT_ALL_PostProcessing for FDM and PolyJet_v3.pptx",
|
|
"content_hash": "139aa114",
|
|
"content_length": 365,
|
|
"cleaned_content_length": 365,
|
|
"status": "SUCCESS",
|
|
"pre_classified_type": "presentation",
|
|
"was_pre_classified": true,
|
|
"elapsed_seconds": 12.0,
|
|
"briefing": {
|
|
"document_type": "presentation",
|
|
"primary_language": "en",
|
|
"density": "high",
|
|
"has_proper_nouns": true,
|
|
"has_dates": false,
|
|
"has_numeric_data": false,
|
|
"has_institutional_language": false,
|
|
"has_technical_terms": true,
|
|
"likely_has_named_entities": true,
|
|
"structure_signals": [
|
|
"headings",
|
|
"bullet_lists"
|
|
],
|
|
"noise_signals": [],
|
|
"extraction_priority": "full"
|
|
},
|
|
"token_reduction_estimate": {
|
|
"original_tokens_approx": 91,
|
|
"orientation_tokens_saved": 200,
|
|
"noise_tokens_saved": 0,
|
|
"noise_reduction_pct": 0.0,
|
|
"total_reduction_pct": 68.7
|
|
}
|
|
},
|
|
{
|
|
"id": "9446c72b_0",
|
|
"source": "08_Printing_Technicals.docx",
|
|
"content_hash": "365e53a6",
|
|
"content_length": 1310,
|
|
"cleaned_content_length": 1310,
|
|
"status": "SUCCESS",
|
|
"pre_classified_type": null,
|
|
"was_pre_classified": false,
|
|
"elapsed_seconds": 31.2,
|
|
"briefing": {
|
|
"document_type": "technical_doc",
|
|
"primary_language": "en",
|
|
"density": "high",
|
|
"has_proper_nouns": true,
|
|
"has_dates": false,
|
|
"has_numeric_data": true,
|
|
"has_institutional_language": false,
|
|
"has_technical_terms": true,
|
|
"likely_has_named_entities": true,
|
|
"structure_signals": [
|
|
"headings",
|
|
"bullet_lists"
|
|
],
|
|
"noise_signals": [],
|
|
"extraction_priority": "full"
|
|
},
|
|
"token_reduction_estimate": {
|
|
"original_tokens_approx": 328,
|
|
"orientation_tokens_saved": 200,
|
|
"noise_tokens_saved": 0,
|
|
"noise_reduction_pct": 0.0,
|
|
"total_reduction_pct": 37.9
|
|
}
|
|
},
|
|
{
|
|
"id": "44b7a630_0",
|
|
"source": "09_Tolerance Test Part.docx",
|
|
"content_hash": "f6a14d20",
|
|
"content_length": 817,
|
|
"cleaned_content_length": 817,
|
|
"status": "SUCCESS",
|
|
"pre_classified_type": null,
|
|
"was_pre_classified": false,
|
|
"elapsed_seconds": 14.0,
|
|
"briefing": {
|
|
"document_type": "syllabus",
|
|
"primary_language": "en",
|
|
"density": "high",
|
|
"has_proper_nouns": true,
|
|
"has_dates": false,
|
|
"has_numeric_data": true,
|
|
"has_institutional_language": true,
|
|
"has_technical_terms": true,
|
|
"likely_has_named_entities": true,
|
|
"structure_signals": [
|
|
"headings",
|
|
"numbered_lists"
|
|
],
|
|
"noise_signals": [],
|
|
"extraction_priority": "full"
|
|
},
|
|
"token_reduction_estimate": {
|
|
"original_tokens_approx": 204,
|
|
"orientation_tokens_saved": 200,
|
|
"noise_tokens_saved": 0,
|
|
"noise_reduction_pct": 0.0,
|
|
"total_reduction_pct": 49.5
|
|
}
|
|
},
|
|
{
|
|
"id": "4a74de83_0",
|
|
"source": "09_Tolerance Test Part.pdf",
|
|
"content_hash": "7f3106a9",
|
|
"content_length": 1049,
|
|
"cleaned_content_length": 1049,
|
|
"status": "SUCCESS",
|
|
"pre_classified_type": null,
|
|
"was_pre_classified": false,
|
|
"elapsed_seconds": 15.9,
|
|
"briefing": {
|
|
"document_type": "syllabus",
|
|
"primary_language": "en",
|
|
"density": "medium",
|
|
"has_proper_nouns": true,
|
|
"has_dates": false,
|
|
"has_numeric_data": true,
|
|
"has_institutional_language": true,
|
|
"has_technical_terms": true,
|
|
"likely_has_named_entities": true,
|
|
"structure_signals": [
|
|
"headings",
|
|
"bullet_lists"
|
|
],
|
|
"noise_signals": [],
|
|
"extraction_priority": "full"
|
|
},
|
|
"token_reduction_estimate": {
|
|
"original_tokens_approx": 262,
|
|
"orientation_tokens_saved": 200,
|
|
"noise_tokens_saved": 0,
|
|
"noise_reduction_pct": 0.0,
|
|
"total_reduction_pct": 43.3
|
|
}
|
|
},
|
|
{
|
|
"id": "aa807935_0",
|
|
"source": "10 Good Things about Aaron for DSI.docx",
|
|
"content_hash": "ff5081e9",
|
|
"content_length": 1126,
|
|
"cleaned_content_length": 1126,
|
|
"status": "SUCCESS",
|
|
"pre_classified_type": null,
|
|
"was_pre_classified": false,
|
|
"elapsed_seconds": 17.4,
|
|
"briefing": {
|
|
"document_type": "unknown",
|
|
"primary_language": "en",
|
|
"density": "high",
|
|
"has_proper_nouns": true,
|
|
"has_dates": false,
|
|
"has_numeric_data": true,
|
|
"has_institutional_language": true,
|
|
"has_technical_terms": true,
|
|
"likely_has_named_entities": true,
|
|
"structure_signals": [
|
|
"bullet_lists"
|
|
],
|
|
"noise_signals": [],
|
|
"extraction_priority": "full"
|
|
},
|
|
"token_reduction_estimate": {
|
|
"original_tokens_approx": 282,
|
|
"orientation_tokens_saved": 200,
|
|
"noise_tokens_saved": 0,
|
|
"noise_reduction_pct": 0.0,
|
|
"total_reduction_pct": 41.5
|
|
}
|
|
},
|
|
{
|
|
"id": "90248749_1",
|
|
"source": "10_Moving Parts.docx",
|
|
"content_hash": "3c2e28b9",
|
|
"content_length": 218,
|
|
"cleaned_content_length": 218,
|
|
"status": "SUCCESS",
|
|
"pre_classified_type": null,
|
|
"was_pre_classified": false,
|
|
"elapsed_seconds": 11.1,
|
|
"briefing": {
|
|
"document_type": "technical_doc",
|
|
"primary_language": "en",
|
|
"density": "high",
|
|
"has_proper_nouns": true,
|
|
"has_dates": false,
|
|
"has_numeric_data": true,
|
|
"has_institutional_language": false,
|
|
"has_technical_terms": true,
|
|
"likely_has_named_entities": true,
|
|
"structure_signals": [
|
|
"headings",
|
|
"numbered_lists"
|
|
],
|
|
"noise_signals": [],
|
|
"extraction_priority": "full"
|
|
},
|
|
"token_reduction_estimate": {
|
|
"original_tokens_approx": 54,
|
|
"orientation_tokens_saved": 200,
|
|
"noise_tokens_saved": 0,
|
|
"noise_reduction_pct": 0.0,
|
|
"total_reduction_pct": 78.6
|
|
}
|
|
},
|
|
{
|
|
"id": "958e5aac_0",
|
|
"source": "1119345.pdf",
|
|
"content_hash": "af7da5db",
|
|
"content_length": 1745,
|
|
"cleaned_content_length": 1745,
|
|
"status": "SUCCESS",
|
|
"pre_classified_type": null,
|
|
"was_pre_classified": false,
|
|
"elapsed_seconds": 30.7,
|
|
"briefing": {
|
|
"document_type": "invoice",
|
|
"primary_language": "en",
|
|
"density": "high",
|
|
"has_proper_nouns": true,
|
|
"has_dates": true,
|
|
"has_numeric_data": true,
|
|
"has_institutional_language": false,
|
|
"has_technical_terms": false,
|
|
"likely_has_named_entities": true,
|
|
"structure_signals": [
|
|
"headings",
|
|
"numbered_lists",
|
|
"tables"
|
|
],
|
|
"noise_signals": [
|
|
"page_numbers",
|
|
"formatting_artifacts",
|
|
"encoding_artifacts"
|
|
],
|
|
"extraction_priority": "full"
|
|
},
|
|
"token_reduction_estimate": {
|
|
"original_tokens_approx": 436,
|
|
"orientation_tokens_saved": 200,
|
|
"noise_tokens_saved": 65,
|
|
"noise_reduction_pct": 15.0,
|
|
"total_reduction_pct": 41.7
|
|
}
|
|
},
|
|
{
|
|
"id": "adbfb7b8_1",
|
|
"source": "2016 - DDF 205 - CAD I Syllabus.pdf",
|
|
"content_hash": "466b3184",
|
|
"content_length": 2161,
|
|
"cleaned_content_length": 2161,
|
|
"status": "SUCCESS",
|
|
"pre_classified_type": null,
|
|
"was_pre_classified": false,
|
|
"elapsed_seconds": 20.0,
|
|
"briefing": {
|
|
"document_type": "syllabus",
|
|
"primary_language": "en",
|
|
"density": "high",
|
|
"has_proper_nouns": true,
|
|
"has_dates": false,
|
|
"has_numeric_data": false,
|
|
"has_institutional_language": true,
|
|
"has_technical_terms": false,
|
|
"likely_has_named_entities": true,
|
|
"structure_signals": [
|
|
"headings"
|
|
],
|
|
"noise_signals": [],
|
|
"extraction_priority": "full"
|
|
},
|
|
"token_reduction_estimate": {
|
|
"original_tokens_approx": 540,
|
|
"orientation_tokens_saved": 200,
|
|
"noise_tokens_saved": 0,
|
|
"noise_reduction_pct": 0.0,
|
|
"total_reduction_pct": 27.0
|
|
}
|
|
},
|
|
{
|
|
"id": "86543785_1",
|
|
"source": "2017 - DDF 220 - Intro to Computational Media Syllabus.pdf",
|
|
"content_hash": "3ad85610",
|
|
"content_length": 2224,
|
|
"cleaned_content_length": 2224,
|
|
"status": "SUCCESS",
|
|
"pre_classified_type": null,
|
|
"was_pre_classified": false,
|
|
"elapsed_seconds": 19.9,
|
|
"briefing": {
|
|
"document_type": "syllabus",
|
|
"primary_language": "en",
|
|
"density": "high",
|
|
"has_proper_nouns": true,
|
|
"has_dates": true,
|
|
"has_numeric_data": false,
|
|
"has_institutional_language": true,
|
|
"has_technical_terms": false,
|
|
"likely_has_named_entities": true,
|
|
"structure_signals": [
|
|
"headings"
|
|
],
|
|
"noise_signals": [],
|
|
"extraction_priority": "full"
|
|
},
|
|
"token_reduction_estimate": {
|
|
"original_tokens_approx": 556,
|
|
"orientation_tokens_saved": 200,
|
|
"noise_tokens_saved": 0,
|
|
"noise_reduction_pct": 0.0,
|
|
"total_reduction_pct": 26.5
|
|
}
|
|
},
|
|
{
|
|
"id": "4e199480_1",
|
|
"source": "2018 - DDF 205 - CAD I Syllabus.pdf",
|
|
"content_hash": "2632e62b",
|
|
"content_length": 2618,
|
|
"cleaned_content_length": 2618,
|
|
"status": "SUCCESS",
|
|
"pre_classified_type": null,
|
|
"was_pre_classified": false,
|
|
"elapsed_seconds": 19.3,
|
|
"briefing": {
|
|
"document_type": "syllabus",
|
|
"primary_language": "en",
|
|
"density": "medium",
|
|
"has_proper_nouns": true,
|
|
"has_dates": true,
|
|
"has_numeric_data": false,
|
|
"has_institutional_language": true,
|
|
"has_technical_terms": false,
|
|
"likely_has_named_entities": true,
|
|
"structure_signals": [
|
|
"headings",
|
|
"bullet_lists"
|
|
],
|
|
"noise_signals": [],
|
|
"extraction_priority": "partial"
|
|
},
|
|
"token_reduction_estimate": {
|
|
"original_tokens_approx": 654,
|
|
"orientation_tokens_saved": 200,
|
|
"noise_tokens_saved": 0,
|
|
"noise_reduction_pct": 0.0,
|
|
"total_reduction_pct": 23.4
|
|
}
|
|
},
|
|
{
|
|
"id": "56b61c68_3",
|
|
"source": "2019-2020 Research and Creative Projects Awards Guidelines.FINAL.pdf",
|
|
"content_hash": "1a4e890b",
|
|
"content_length": 2228,
|
|
"cleaned_content_length": 2228,
|
|
"status": "SUCCESS",
|
|
"pre_classified_type": null,
|
|
"was_pre_classified": false,
|
|
"elapsed_seconds": 20.5,
|
|
"briefing": {
|
|
"document_type": "unknown",
|
|
"primary_language": "en",
|
|
"density": "high",
|
|
"has_proper_nouns": true,
|
|
"has_dates": false,
|
|
"has_numeric_data": true,
|
|
"has_institutional_language": true,
|
|
"has_technical_terms": true,
|
|
"likely_has_named_entities": true,
|
|
"structure_signals": [
|
|
"headings",
|
|
"numbered_lists"
|
|
],
|
|
"noise_signals": [],
|
|
"extraction_priority": "full"
|
|
},
|
|
"token_reduction_estimate": {
|
|
"original_tokens_approx": 557,
|
|
"orientation_tokens_saved": 200,
|
|
"noise_tokens_saved": 0,
|
|
"noise_reduction_pct": 0.0,
|
|
"total_reduction_pct": 26.4
|
|
}
|
|
},
|
|
{
|
|
"id": "9ed5c43e_2",
|
|
"source": "2019 - DDF 305 - Materials Syllabus.pdf",
|
|
"content_hash": "c0521ba2",
|
|
"content_length": 1842,
|
|
"cleaned_content_length": 1842,
|
|
"status": "SUCCESS",
|
|
"pre_classified_type": null,
|
|
"was_pre_classified": false,
|
|
"elapsed_seconds": 21.7,
|
|
"briefing": {
|
|
"document_type": "syllabus",
|
|
"primary_language": "en",
|
|
"density": "high",
|
|
"has_proper_nouns": true,
|
|
"has_dates": true,
|
|
"has_numeric_data": true,
|
|
"has_institutional_language": true,
|
|
"has_technical_terms": true,
|
|
"likely_has_named_entities": true,
|
|
"structure_signals": [
|
|
"headings",
|
|
"numbered_lists",
|
|
"tables"
|
|
],
|
|
"noise_signals": [],
|
|
"extraction_priority": "full"
|
|
},
|
|
"token_reduction_estimate": {
|
|
"original_tokens_approx": 460,
|
|
"orientation_tokens_saved": 200,
|
|
"noise_tokens_saved": 0,
|
|
"noise_reduction_pct": 0.0,
|
|
"total_reduction_pct": 30.3
|
|
}
|
|
},
|
|
{
|
|
"id": "2e264727_2",
|
|
"source": "2020 - DDF 220 - Intro to Computational Media Syllabus.pdf",
|
|
"content_hash": "fe3ca5be",
|
|
"content_length": 2580,
|
|
"cleaned_content_length": 2580,
|
|
"status": "SUCCESS",
|
|
"pre_classified_type": null,
|
|
"was_pre_classified": false,
|
|
"elapsed_seconds": 19.4,
|
|
"briefing": {
|
|
"document_type": "academic_pdf",
|
|
"primary_language": "en",
|
|
"density": "medium",
|
|
"has_proper_nouns": true,
|
|
"has_dates": false,
|
|
"has_numeric_data": true,
|
|
"has_institutional_language": true,
|
|
"has_technical_terms": true,
|
|
"likely_has_named_entities": true,
|
|
"structure_signals": [
|
|
"tables",
|
|
"headings"
|
|
],
|
|
"noise_signals": [
|
|
"line_numbers"
|
|
],
|
|
"extraction_priority": "partial"
|
|
},
|
|
"token_reduction_estimate": {
|
|
"original_tokens_approx": 645,
|
|
"orientation_tokens_saved": 200,
|
|
"noise_tokens_saved": 32,
|
|
"noise_reduction_pct": 5.0,
|
|
"total_reduction_pct": 27.5
|
|
}
|
|
},
|
|
{
|
|
"id": "c0cd3599_3",
|
|
"source": "2021 - DDF 320 - Design Intents Syllabus.pdf",
|
|
"content_hash": "588d34a3",
|
|
"content_length": 1560,
|
|
"cleaned_content_length": 1560,
|
|
"status": "SUCCESS",
|
|
"pre_classified_type": null,
|
|
"was_pre_classified": false,
|
|
"elapsed_seconds": 22.8,
|
|
"briefing": {
|
|
"document_type": "syllabus",
|
|
"primary_language": "en",
|
|
"density": "high",
|
|
"has_proper_nouns": true,
|
|
"has_dates": true,
|
|
"has_numeric_data": false,
|
|
"has_institutional_language": true,
|
|
"has_technical_terms": false,
|
|
"likely_has_named_entities": true,
|
|
"structure_signals": [
|
|
"headings",
|
|
"bullet_lists",
|
|
"numbered_lists"
|
|
],
|
|
"noise_signals": [],
|
|
"extraction_priority": "full"
|
|
},
|
|
"token_reduction_estimate": {
|
|
"original_tokens_approx": 390,
|
|
"orientation_tokens_saved": 200,
|
|
"noise_tokens_saved": 0,
|
|
"noise_reduction_pct": 0.0,
|
|
"total_reduction_pct": 33.9
|
|
}
|
|
},
|
|
{
|
|
"id": "9ea5656f_2",
|
|
"source": "2023 Faculty Report Aaron Nelson.docx",
|
|
"content_hash": "fd68d021",
|
|
"content_length": 2698,
|
|
"cleaned_content_length": 2698,
|
|
"status": "SUCCESS",
|
|
"pre_classified_type": null,
|
|
"was_pre_classified": false,
|
|
"elapsed_seconds": 19.2,
|
|
"briefing": {
|
|
"document_type": "academic_pdf",
|
|
"primary_language": "en",
|
|
"density": "high",
|
|
"has_proper_nouns": true,
|
|
"has_dates": false,
|
|
"has_numeric_data": true,
|
|
"has_institutional_language": true,
|
|
"has_technical_terms": true,
|
|
"likely_has_named_entities": true,
|
|
"structure_signals": [
|
|
"headings",
|
|
"bullet_lists"
|
|
],
|
|
"noise_signals": [],
|
|
"extraction_priority": "full"
|
|
},
|
|
"token_reduction_estimate": {
|
|
"original_tokens_approx": 674,
|
|
"orientation_tokens_saved": 200,
|
|
"noise_tokens_saved": 0,
|
|
"noise_reduction_pct": 0.0,
|
|
"total_reduction_pct": 22.9
|
|
}
|
|
},
|
|
{
|
|
"id": "33aae3e5_2",
|
|
"source": "2023 Faculty Report Template.docx",
|
|
"content_hash": "c2d50031",
|
|
"content_length": 2100,
|
|
"cleaned_content_length": 2100,
|
|
"status": "SUCCESS",
|
|
"pre_classified_type": null,
|
|
"was_pre_classified": false,
|
|
"elapsed_seconds": 18.9,
|
|
"briefing": {
|
|
"document_type": "syllabus",
|
|
"primary_language": "en",
|
|
"density": "medium",
|
|
"has_proper_nouns": true,
|
|
"has_dates": false,
|
|
"has_numeric_data": false,
|
|
"has_institutional_language": true,
|
|
"has_technical_terms": false,
|
|
"likely_has_named_entities": true,
|
|
"structure_signals": [
|
|
"headings"
|
|
],
|
|
"noise_signals": [],
|
|
"extraction_priority": "full"
|
|
},
|
|
"token_reduction_estimate": {
|
|
"original_tokens_approx": 525,
|
|
"orientation_tokens_saved": 200,
|
|
"noise_tokens_saved": 0,
|
|
"noise_reduction_pct": 0.0,
|
|
"total_reduction_pct": 27.6
|
|
}
|
|
},
|
|
{
|
|
"id": "bf155f9f_0",
|
|
"source": "2026-04-26-22-44-voice.md",
|
|
"content_hash": "41cc3d28",
|
|
"content_length": 165,
|
|
"cleaned_content_length": 72,
|
|
"status": "SUCCESS",
|
|
"pre_classified_type": "voice_capture",
|
|
"was_pre_classified": true,
|
|
"elapsed_seconds": 21.0,
|
|
"briefing": {
|
|
"document_type": "voice_capture",
|
|
"primary_language": "en",
|
|
"density": "low",
|
|
"has_proper_nouns": true,
|
|
"has_dates": false,
|
|
"has_numeric_data": false,
|
|
"has_institutional_language": false,
|
|
"has_technical_terms": false,
|
|
"likely_has_named_entities": true,
|
|
"structure_signals": [],
|
|
"noise_signals": [],
|
|
"extraction_priority": "partial"
|
|
},
|
|
"token_reduction_estimate": {
|
|
"original_tokens_approx": 18,
|
|
"orientation_tokens_saved": 200,
|
|
"noise_tokens_saved": 0,
|
|
"noise_reduction_pct": 0.0,
|
|
"total_reduction_pct": 91.7
|
|
}
|
|
},
|
|
{
|
|
"id": "5c9f5ad5_0",
|
|
"source": "2026-04-26-22-52-voice.md",
|
|
"content_hash": "0ed1efba",
|
|
"content_length": 171,
|
|
"cleaned_content_length": 78,
|
|
"status": "SUCCESS",
|
|
"pre_classified_type": "voice_capture",
|
|
"was_pre_classified": true,
|
|
"elapsed_seconds": 8.9,
|
|
"briefing": {
|
|
"document_type": "voice_capture",
|
|
"primary_language": "en",
|
|
"density": "medium",
|
|
"has_proper_nouns": true,
|
|
"has_dates": false,
|
|
"has_numeric_data": false,
|
|
"has_institutional_language": false,
|
|
"has_technical_terms": false,
|
|
"likely_has_named_entities": true,
|
|
"structure_signals": [
|
|
"headings"
|
|
],
|
|
"noise_signals": [],
|
|
"extraction_priority": "partial"
|
|
},
|
|
"token_reduction_estimate": {
|
|
"original_tokens_approx": 20,
|
|
"orientation_tokens_saved": 200,
|
|
"noise_tokens_saved": 0,
|
|
"noise_reduction_pct": 0.0,
|
|
"total_reduction_pct": 91.1
|
|
}
|
|
},
|
|
{
|
|
"id": "8bc956ad_0",
|
|
"source": "2026-04-26-23-04-voice.md",
|
|
"content_hash": "c455ef44",
|
|
"content_length": 931,
|
|
"cleaned_content_length": 838,
|
|
"status": "SUCCESS",
|
|
"pre_classified_type": "voice_capture",
|
|
"was_pre_classified": true,
|
|
"elapsed_seconds": 14.9,
|
|
"briefing": {
|
|
"document_type": "voice_capture",
|
|
"primary_language": "en",
|
|
"density": "low",
|
|
"has_proper_nouns": true,
|
|
"has_dates": false,
|
|
"has_numeric_data": false,
|
|
"has_institutional_language": false,
|
|
"has_technical_terms": false,
|
|
"likely_has_named_entities": true,
|
|
"structure_signals": [],
|
|
"noise_signals": [
|
|
"formatting_artifacts"
|
|
],
|
|
"extraction_priority": "partial"
|
|
},
|
|
"token_reduction_estimate": {
|
|
"original_tokens_approx": 210,
|
|
"orientation_tokens_saved": 200,
|
|
"noise_tokens_saved": 10,
|
|
"noise_reduction_pct": 5.0,
|
|
"total_reduction_pct": 51.4
|
|
}
|
|
},
|
|
{
|
|
"id": "af176130_0",
|
|
"source": "2026-04-26-lucid-1.md",
|
|
"content_hash": "d9c51a1c",
|
|
"content_length": 2444,
|
|
"cleaned_content_length": 2302,
|
|
"status": "SUCCESS",
|
|
"pre_classified_type": "dream_lucid",
|
|
"was_pre_classified": true,
|
|
"elapsed_seconds": 31.7,
|
|
"briefing": {
|
|
"document_type": "dream_lucid",
|
|
"primary_language": "en",
|
|
"density": "medium",
|
|
"has_proper_nouns": true,
|
|
"has_dates": false,
|
|
"has_numeric_data": true,
|
|
"has_institutional_language": true,
|
|
"has_technical_terms": false,
|
|
"likely_has_named_entities": true,
|
|
"structure_signals": [],
|
|
"noise_signals": [],
|
|
"extraction_priority": "partial"
|
|
},
|
|
"token_reduction_estimate": {
|
|
"original_tokens_approx": 576,
|
|
"orientation_tokens_saved": 200,
|
|
"noise_tokens_saved": 0,
|
|
"noise_reduction_pct": 0.0,
|
|
"total_reduction_pct": 25.8
|
|
}
|
|
},
|
|
{
|
|
"id": "52114711_0",
|
|
"source": "2026-04-26-lucid.md",
|
|
"content_hash": "4c5fb648",
|
|
"content_length": 2437,
|
|
"cleaned_content_length": 2295,
|
|
"status": "SUCCESS",
|
|
"pre_classified_type": "dream_lucid",
|
|
"was_pre_classified": true,
|
|
"elapsed_seconds": 19.0,
|
|
"briefing": {
|
|
"document_type": "dream_lucid",
|
|
"primary_language": "en",
|
|
"density": "high",
|
|
"has_proper_nouns": true,
|
|
"has_dates": false,
|
|
"has_numeric_data": false,
|
|
"has_institutional_language": true,
|
|
"has_technical_terms": true,
|
|
"likely_has_named_entities": true,
|
|
"structure_signals": [
|
|
"headings",
|
|
"bullet_lists"
|
|
],
|
|
"noise_signals": [],
|
|
"extraction_priority": "full"
|
|
},
|
|
"token_reduction_estimate": {
|
|
"original_tokens_approx": 574,
|
|
"orientation_tokens_saved": 200,
|
|
"noise_tokens_saved": 0,
|
|
"noise_reduction_pct": 0.0,
|
|
"total_reduction_pct": 25.8
|
|
}
|
|
},
|
|
{
|
|
"id": "1bf832a0_0",
|
|
"source": "2026-04-26-nrem-1.md",
|
|
"content_hash": "1ad1e9c1",
|
|
"content_length": 1586,
|
|
"cleaned_content_length": 1548,
|
|
"status": "SUCCESS",
|
|
"pre_classified_type": "dream_nrem",
|
|
"was_pre_classified": true,
|
|
"elapsed_seconds": 33.7,
|
|
"briefing": {
|
|
"document_type": "dream_nrem",
|
|
"primary_language": "en",
|
|
"density": "medium",
|
|
"has_proper_nouns": true,
|
|
"has_dates": false,
|
|
"has_numeric_data": true,
|
|
"has_institutional_language": true,
|
|
"has_technical_terms": false,
|
|
"likely_has_named_entities": true,
|
|
"structure_signals": [
|
|
"bullet_lists",
|
|
"numbered_lists"
|
|
],
|
|
"noise_signals": [],
|
|
"extraction_priority": "partial"
|
|
},
|
|
"token_reduction_estimate": {
|
|
"original_tokens_approx": 387,
|
|
"orientation_tokens_saved": 200,
|
|
"noise_tokens_saved": 0,
|
|
"noise_reduction_pct": 0.0,
|
|
"total_reduction_pct": 34.1
|
|
}
|
|
},
|
|
{
|
|
"id": "a16d6571_0",
|
|
"source": "2026-04-26-nrem.md",
|
|
"content_hash": "1714ccc0",
|
|
"content_length": 1638,
|
|
"cleaned_content_length": 1600,
|
|
"status": "SUCCESS",
|
|
"pre_classified_type": "dream_nrem",
|
|
"was_pre_classified": true,
|
|
"elapsed_seconds": 19.6,
|
|
"briefing": {
|
|
"document_type": "dream_nrem",
|
|
"primary_language": "en",
|
|
"density": "medium",
|
|
"has_proper_nouns": true,
|
|
"has_dates": false,
|
|
"has_numeric_data": true,
|
|
"has_institutional_language": true,
|
|
"has_technical_terms": false,
|
|
"likely_has_named_entities": true,
|
|
"structure_signals": [
|
|
"bullet_lists",
|
|
"numbered_lists"
|
|
],
|
|
"noise_signals": [],
|
|
"extraction_priority": "partial"
|
|
},
|
|
"token_reduction_estimate": {
|
|
"original_tokens_approx": 400,
|
|
"orientation_tokens_saved": 200,
|
|
"noise_tokens_saved": 0,
|
|
"noise_reduction_pct": 0.0,
|
|
"total_reduction_pct": 33.3
|
|
}
|
|
},
|
|
{
|
|
"id": "b696802f_0",
|
|
"source": "2026-04-27-04-34-image.md",
|
|
"content_hash": "3cce200d",
|
|
"content_length": 2027,
|
|
"cleaned_content_length": 1853,
|
|
"status": "SUCCESS",
|
|
"pre_classified_type": "image_capture",
|
|
"was_pre_classified": true,
|
|
"elapsed_seconds": 31.5,
|
|
"briefing": {
|
|
"document_type": "image_capture",
|
|
"primary_language": "en",
|
|
"density": "high",
|
|
"has_proper_nouns": true,
|
|
"has_dates": false,
|
|
"has_numeric_data": true,
|
|
"has_institutional_language": false,
|
|
"has_technical_terms": true,
|
|
"likely_has_named_entities": true,
|
|
"structure_signals": [
|
|
"images"
|
|
],
|
|
"noise_signals": [],
|
|
"extraction_priority": "full"
|
|
},
|
|
"token_reduction_estimate": {
|
|
"original_tokens_approx": 463,
|
|
"orientation_tokens_saved": 200,
|
|
"noise_tokens_saved": 0,
|
|
"noise_reduction_pct": 0.0,
|
|
"total_reduction_pct": 30.2
|
|
}
|
|
},
|
|
{
|
|
"id": "6bc36d6f_0",
|
|
"source": "2026-04-27-04-36-image.md",
|
|
"content_hash": "29717d0c",
|
|
"content_length": 1755,
|
|
"status": "FAILED",
|
|
"pre_classified_type": "image_capture",
|
|
"error": "JSON_ERROR: Expecting property name enclosed in double quotes: line 11 column 36 (char 308) | raw: {\n \"document_type\": \"image_capture\",\n \"primary_language\": \"en\",\n \"density\": \"high\",\n \"has_proper_nouns\": true,\n \"has_dates\": false,\n \"has_numeric_data\": false,\n \"has_institutional_language\": fa",
|
|
"elapsed_seconds": 19.5
|
|
},
|
|
{
|
|
"id": "8b7ed0da_0",
|
|
"source": "2026-04-27-04-41-image.md",
|
|
"content_hash": "47a1f451",
|
|
"content_length": 2148,
|
|
"status": "FAILED",
|
|
"pre_classified_type": "image_capture",
|
|
"error": "JSON_ERROR: Expecting property name enclosed in double quotes: line 11 column 38 (char 310) | raw: {\n \"document_type\": \"image_capture\",\n \"primary_language\": \"en\",\n \"density\": \"medium\",\n \"has_proper_nouns\": true,\n \"has_dates\": false,\n \"has_numeric_data\": true,\n \"has_institutional_language\": t",
|
|
"elapsed_seconds": 19.3
|
|
},
|
|
{
|
|
"id": "700d4582_0",
|
|
"source": "2026-04-27-06-21-image.md",
|
|
"content_hash": "b143e6fc",
|
|
"content_length": 1643,
|
|
"cleaned_content_length": 1469,
|
|
"status": "SUCCESS",
|
|
"pre_classified_type": "image_capture",
|
|
"was_pre_classified": true,
|
|
"elapsed_seconds": 18.3,
|
|
"briefing": {
|
|
"document_type": "image_capture",
|
|
"primary_language": "en",
|
|
"density": "low",
|
|
"has_proper_nouns": true,
|
|
"has_dates": false,
|
|
"has_numeric_data": false,
|
|
"has_institutional_language": false,
|
|
"has_technical_terms": false,
|
|
"likely_has_named_entities": true,
|
|
"structure_signals": [],
|
|
"noise_signals": [],
|
|
"extraction_priority": "partial"
|
|
},
|
|
"token_reduction_estimate": {
|
|
"original_tokens_approx": 367,
|
|
"orientation_tokens_saved": 200,
|
|
"noise_tokens_saved": 0,
|
|
"noise_reduction_pct": 0.0,
|
|
"total_reduction_pct": 35.3
|
|
}
|
|
},
|
|
{
|
|
"id": "31317444_0",
|
|
"source": "2026-04-27-19-04-image.md",
|
|
"content_hash": "8bd62d02",
|
|
"content_length": 1767,
|
|
"cleaned_content_length": 1593,
|
|
"status": "SUCCESS",
|
|
"pre_classified_type": "image_capture",
|
|
"was_pre_classified": true,
|
|
"elapsed_seconds": 18.4,
|
|
"briefing": {
|
|
"document_type": "image_capture",
|
|
"primary_language": "en",
|
|
"density": "medium",
|
|
"has_proper_nouns": true,
|
|
"has_dates": false,
|
|
"has_numeric_data": false,
|
|
"has_institutional_language": true,
|
|
"has_technical_terms": true,
|
|
"likely_has_named_entities": true,
|
|
"structure_signals": [],
|
|
"noise_signals": [],
|
|
"extraction_priority": "partial"
|
|
},
|
|
"token_reduction_estimate": {
|
|
"original_tokens_approx": 398,
|
|
"orientation_tokens_saved": 200,
|
|
"noise_tokens_saved": 0,
|
|
"noise_reduction_pct": 0.0,
|
|
"total_reduction_pct": 33.4
|
|
}
|
|
},
|
|
{
|
|
"id": "bc4bffcd_0",
|
|
"source": "2026-04-27-20-18-image.md",
|
|
"content_hash": "c33f8f22",
|
|
"content_length": 1856,
|
|
"cleaned_content_length": 1682,
|
|
"status": "SUCCESS",
|
|
"pre_classified_type": "image_capture",
|
|
"was_pre_classified": true,
|
|
"elapsed_seconds": 18.8,
|
|
"briefing": {
|
|
"document_type": "image_capture",
|
|
"primary_language": "en",
|
|
"density": "high",
|
|
"has_proper_nouns": true,
|
|
"has_dates": false,
|
|
"has_numeric_data": false,
|
|
"has_institutional_language": true,
|
|
"has_technical_terms": true,
|
|
"likely_has_named_entities": true,
|
|
"structure_signals": [],
|
|
"noise_signals": [],
|
|
"extraction_priority": "full"
|
|
},
|
|
"token_reduction_estimate": {
|
|
"original_tokens_approx": 420,
|
|
"orientation_tokens_saved": 200,
|
|
"noise_tokens_saved": 0,
|
|
"noise_reduction_pct": 0.0,
|
|
"total_reduction_pct": 32.2
|
|
}
|
|
}
|
|
],
|
|
"summary": {
|
|
"total": 50,
|
|
"success": 48,
|
|
"failed": 2,
|
|
"success_rate": 96.0,
|
|
"pre_classified_by_rule": 20,
|
|
"classified_by_model": 30,
|
|
"extraction_priority_breakdown": {
|
|
"full": 38,
|
|
"partial": 10,
|
|
"skip": 0
|
|
},
|
|
"avg_token_reduction_pct": 42.0,
|
|
"total_elapsed_seconds": 1096.7,
|
|
"avg_seconds_per_doc": 21.9,
|
|
"projected_50_doc_minutes": 18.2,
|
|
"approach_viable": true
|
|
}
|
|
} |