corpus_integrity.py: write unreadable files to ingest_failures for UI visibility
This commit is contained in:
@@ -195,6 +195,21 @@ def run_reconciliation(fix=False):
|
||||
print(f" Queued: {finfo['source']}")
|
||||
else:
|
||||
print(f" Skipped (unreadable): {finfo['source']}")
|
||||
try:
|
||||
pg = get_pg()
|
||||
cur = pg.cursor()
|
||||
cur.execute("""
|
||||
INSERT INTO ingest_failures (source, filepath, error, retry_count, first_failed_at, last_failed_at)
|
||||
VALUES (%s, %s, %s, 2, NOW(), NOW())
|
||||
ON CONFLICT (source) DO UPDATE SET
|
||||
error = EXCLUDED.error,
|
||||
last_failed_at = NOW()
|
||||
""", (finfo["source"], finfo["filepath"],
|
||||
"Empty text — likely scanned, encrypted, or corrupt. Requires manual review or OCR."))
|
||||
pg.commit()
|
||||
pg.close()
|
||||
except Exception as e:
|
||||
print(f" WARNING: could not record failure: {e}")
|
||||
print()
|
||||
|
||||
report = {
|
||||
|
||||
Reference in New Issue
Block a user