diff --git a/scripts/encoding.py b/scripts/encoding.py
index 3e7092c..41e06b6 100644
--- a/scripts/encoding.py
+++ b/scripts/encoding.py
@@ -12,6 +12,7 @@ Replaces four separate extract reimplementations and two extract-chunk-embed pat
 import hashlib
 import json
 import logging
+import re
 from pathlib import Path
 
 from docx import Document as DocxDocument
@@ -24,6 +25,62 @@ SUPPORTED = {".docx", ".pdf", ".pptx", ".txt", ".md"}
 DEFAULT_CHUNK_SIZE = 500
 DEFAULT_CHUNK_OVERLAP = 50
 
+_BOLD_KV_RE = re.compile(r"^\*\*[\w +/-]+?:\*\*")
+
+
+def _strip_md_frontmatter(text: str) -> str:
+    """Strip a leading frontmatter block from markdown, if present.
+
+    Recognizes two formats:
+      - YAML-style: file's first non-empty line is `---`, terminated by `---`.
+        Only triggered when no heading precedes — guards against `---`
+        horizontal rules that follow an H1.
+      - Capture-style: optional H1 heading, then one or more `**key:** value`
+        lines (and blanks), terminated by `---`. The H1 is preserved; the
+        key/value block + separator are removed.
+
+    Body `---` rules and body `**bold:**` lines are never touched — the scan
+    aborts as soon as a non-frontmatter line appears in the leading block.
+    """
+    lines = text.splitlines()
+    n = len(lines)
+    i = 0
+    while i < n and not lines[i].strip():
+        i += 1
+    heading = None
+    if i < n and lines[i].startswith("# "):
+        heading = lines[i]
+        i += 1
+        while i < n and not lines[i].strip():
+            i += 1
+    if i >= n:
+        return text
+    first = lines[i].strip()
+    if heading is None and first == "---":
+        j = i + 1
+        while j < n and lines[j].strip() != "---":
+            j += 1
+        if j >= n:
+            return text
+        body_start = j + 1
+    elif _BOLD_KV_RE.match(first):
+        j = i
+        while j < n:
+            s = lines[j].strip()
+            if not s or _BOLD_KV_RE.match(s):
+                j += 1
+                continue
+            if s == "---":
+                body_start = j + 1
+                break
+            return text
+        else:
+            return text
+    else:
+        return text
+    body = "\n".join(lines[body_start:]).lstrip("\n")
+    return f"{heading}\n\n{body}" if heading else body
+
 
 def _docx_cell_paragraphs(cell):
     yield from (p for p in cell.paragraphs if p.text.strip())
@@ -89,7 +146,10 @@ def extract_text(filepath: Path) -> str:
                         parts.append(notes)
             return "\n".join(parts)
         elif suffix in {".txt", ".md"}:
-            return filepath.read_text(encoding="utf-8", errors="ignore")
+            text = filepath.read_text(encoding="utf-8", errors="ignore")
+            if suffix == ".md":
+                return _strip_md_frontmatter(text)
+            return text
     except Exception as e:
         log.warning(f"Text extraction failed for {filepath.name}: {e}")
     return ""