diff --git a/lib/db.py b/lib/db.py
index f28f295..af06d37 100644
--- a/lib/db.py
+++ b/lib/db.py
@@ -9,7 +9,7 @@ from . import config
 
 logger = logging.getLogger("pipeline.db")
 
-SCHEMA_VERSION = 25
+SCHEMA_VERSION = 26
 
 SCHEMA_SQL = """
 CREATE TABLE IF NOT EXISTS schema_version (
@@ -35,6 +35,15 @@ CREATE TABLE IF NOT EXISTS sources (
     feedback TEXT,
     -- eval feedback for re-extraction (JSON)
     cost_usd REAL DEFAULT 0,
+    -- v26: provenance — publisher (news org / venue) + content author.
+    -- publisher_id references publishers(id) when source is from a known org.
+    -- original_author_handle references contributors(handle) when author is in our system.
+    -- original_author is free-text fallback ("Kim et al.", "Robin Hanson") — not credit-bearing.
+    publisher_id INTEGER REFERENCES publishers(id),
+    content_type TEXT,
+    -- article | paper | tweet | conversation | self_authored | webpage | podcast
+    original_author TEXT,
+    original_author_handle TEXT REFERENCES contributors(handle),
     created_at TEXT DEFAULT (datetime('now')),
     updated_at TEXT DEFAULT (datetime('now'))
 );
@@ -207,6 +216,33 @@ CREATE TABLE IF NOT EXISTS contributor_aliases (
     created_at TEXT DEFAULT (datetime('now'))
 );
 CREATE INDEX IF NOT EXISTS idx_aliases_canonical ON contributor_aliases(canonical);
+
+-- Publishers: news orgs, academic venues, social platforms. NOT contributors — these
+-- provide metadata/provenance for sources, never earn leaderboard credit. Separating
+-- these from contributors prevents CNBC/SpaceNews from dominating the leaderboard.
+-- (Apr 24 Cory directive: "only credit the original source if its on X or tg")
+CREATE TABLE IF NOT EXISTS publishers (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    name TEXT NOT NULL UNIQUE,
+    kind TEXT CHECK(kind IN ('news', 'academic', 'social_platform', 'podcast', 'self', 'internal', 'legal', 'government', 'research_org', 'commercial', 'other')),
+    url_pattern TEXT,
+    created_at TEXT DEFAULT (datetime('now'))
+);
+CREATE INDEX IF NOT EXISTS idx_publishers_name ON publishers(name);
+CREATE INDEX IF NOT EXISTS idx_publishers_kind ON publishers(kind);
+
+-- Multi-platform identity: one contributor, many handles. Enables the leaderboard to
+-- unify @thesensatore (X) + thesensatore (TG) + thesensatore@github into one person.
+-- Writers check this table after resolving aliases to find canonical contributor handle.
+CREATE TABLE IF NOT EXISTS contributor_identities (
+    contributor_handle TEXT NOT NULL,
+    platform TEXT NOT NULL CHECK(platform IN ('x', 'telegram', 'github', 'email', 'web', 'internal')),
+    platform_handle TEXT NOT NULL,
+    verified INTEGER DEFAULT 0,
+    created_at TEXT DEFAULT (datetime('now')),
+    PRIMARY KEY (platform, platform_handle)
+);
+CREATE INDEX IF NOT EXISTS idx_identities_contributor ON contributor_identities(contributor_handle);
 """
 
 
@@ -764,6 +800,51 @@ def migrate(conn: sqlite3.Connection):
         conn.commit()
         logger.info("Migration v25: patched kind='agent' for pipeline handle")
 
+    if current < 26:
+        # Add publishers + contributor_identities. Non-breaking — new tables only.
+        # No existing data moved. Classification into publishers happens via a
+        # separate script (scripts/reclassify-contributors.py) with Cory-reviewed
+        # seed list. CHECK constraint on contributors.kind deferred to v27 after
+        # classification completes. (Apr 24 Cory directive: "fix schema, don't
+        # filter output" — separate contributors from publishers at the data layer.)
+        conn.executescript("""
+            CREATE TABLE IF NOT EXISTS publishers (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                name TEXT NOT NULL UNIQUE,
+                kind TEXT CHECK(kind IN ('news', 'academic', 'social_platform', 'podcast', 'self', 'internal', 'legal', 'government', 'research_org', 'commercial', 'other')),
+                url_pattern TEXT,
+                created_at TEXT DEFAULT (datetime('now'))
+            );
+            CREATE INDEX IF NOT EXISTS idx_publishers_name ON publishers(name);
+            CREATE INDEX IF NOT EXISTS idx_publishers_kind ON publishers(kind);
+
+            CREATE TABLE IF NOT EXISTS contributor_identities (
+                contributor_handle TEXT NOT NULL,
+                platform TEXT NOT NULL CHECK(platform IN ('x', 'telegram', 'github', 'email', 'web', 'internal')),
+                platform_handle TEXT NOT NULL,
+                verified INTEGER DEFAULT 0,
+                created_at TEXT DEFAULT (datetime('now')),
+                PRIMARY KEY (platform, platform_handle)
+            );
+            CREATE INDEX IF NOT EXISTS idx_identities_contributor ON contributor_identities(contributor_handle);
+        """)
+        # Extend sources with provenance columns. ALTER TABLE ADD COLUMN is
+        # idempotent-safe via try/except because SQLite doesn't support IF NOT EXISTS
+        # on column adds.
+        for col_sql in (
+            "ALTER TABLE sources ADD COLUMN publisher_id INTEGER REFERENCES publishers(id)",
+            "ALTER TABLE sources ADD COLUMN content_type TEXT",
+            "ALTER TABLE sources ADD COLUMN original_author TEXT",
+            "ALTER TABLE sources ADD COLUMN original_author_handle TEXT REFERENCES contributors(handle)",
+        ):
+            try:
+                conn.execute(col_sql)
+            except sqlite3.OperationalError as e:
+                if "duplicate column" not in str(e).lower():
+                    raise
+        conn.commit()
+        logger.info("Migration v26: added publishers + contributor_identities tables + sources provenance columns")
+
     if current < SCHEMA_VERSION:
         conn.execute(
             "INSERT OR REPLACE INTO schema_version (version) VALUES (?)",
diff --git a/scripts/classify-contributors.py b/scripts/classify-contributors.py
new file mode 100644
index 0000000..80299ae
--- /dev/null
+++ b/scripts/classify-contributors.py
@@ -0,0 +1,366 @@
+#!/usr/bin/env python3
+"""Classify `contributors` rows into {keep_person, keep_agent, move_to_publisher, delete_garbage}.
+
+Reads current contributors table, proposes reclassification per v26 schema design:
+  - Real humans + Pentagon agents stay in contributors (kind='person'|'agent')
+  - News orgs, publications, venues move to publishers table (new v26)
+  - Multi-word hyphenated garbage (parsing artifacts) gets deleted
+  - Their contribution_events are handled per category:
+      * Publishers: DELETE events (orgs shouldn't have credit)
+      * Garbage: DELETE events (bogus data)
+      * Persons/agents: keep events untouched
+
+Classification is heuristic — uses explicit allowlists + regex patterns + length gates.
+Ambiguous cases default to 'review_needed' (human decision).
+
+Usage:
+  python3 scripts/classify-contributors.py              # dry-run analysis + report
+  python3 scripts/classify-contributors.py --apply      # write changes
+  python3 scripts/classify-contributors.py --show <handle>  # inspect a single row
+
+Writes to pipeline.db only. Does NOT modify claim files.
+"""
+import argparse
+import os
+import re
+import sqlite3
+import sys
+from collections import Counter
+from pathlib import Path
+
+DB_PATH = os.environ.get("PIPELINE_DB", "/opt/teleo-eval/pipeline/pipeline.db")
+
+# Pentagon agents: kind='agent'. Authoritative list.
+PENTAGON_AGENTS = frozenset({
+    "rio", "leo", "theseus", "vida", "clay", "astra",
+    "oberon", "argus", "rhea", "ganymede", "epimetheus", "hermes", "ship",
+    "pipeline",
+})
+
+# Publisher/news-org handles seen in current contributors table.
+# Grouped by kind for the publishers row. Classified by inspection.
+# NOTE: This list is hand-curated — add to it as new orgs appear.
+PUBLISHERS_NEWS = {
+    # News outlets / brands
+    "cnbc", "al-jazeera", "axios", "bloomberg", "reuters", "bettorsinsider",
+    "fortune", "techcrunch", "coindesk", "coindesk-staff", "coindesk-research",
+    "coindesk research", "coindesk staff",
+    "defense-one", "thedefensepost", "theregister", "the-intercept",
+    "the-meridiem", "variety", "variety-staff", "variety staff", "spacenews",
+    "nasaspaceflight", "thedonkey", "insidedefense", "techpolicypress",
+    "morganlewis", "casinoorg", "deadline", "animationmagazine",
+    "defensepost", "casino-org", "casino.org",
+    "air & space forces magazine", "ieee spectrum", "techcrunch-staff",
+    "blockworks", "blockworks-staff", "decrypt", "ainvest", "banking-dive", "banking dive",
+    "cset-georgetown", "cset georgetown",
+    "kff", "kff-health-news", "kff health news", "kff-health-news---cbo",
+    "kff-health-news-/-cbo", "kff health news / cbo", "kffhealthnews",
+    "bloomberg-law",
+    "norton-rose-fulbright", "norton rose fulbright",
+    "defence-post", "the-defensepost",
+    "wilmerhale", "mofo", "sciencedirect",
+    "yogonet", "csr", "aisi-uk", "aisi", "aisi_gov", "rand",
+    "armscontrol", "eclinmed", "solana-compass", "solana compass",
+    "pmc11919318", "pmc11780016",
+    "healthverity", "natrium", "form-energy",
+    "courtlistener", "curtis-schiff", "curtis-schiff-prediction-markets",
+    "prophetx", "techpolicypress-staff",
+    "npr", "venturebeat", "geekwire", "payloadspace", "the-ankler",
+    "theankler", "tubefilter", "emarketer", "dagster",
+    "numerai",  # fund/project brand, not person
+    "psl", "multistate",
+}
+PUBLISHERS_ACADEMIC = {
+    # Academic orgs, labs, papers, journals, institutions
+    "arxiv", "metr", "metr_evals", "apollo-research", "apollo research", "apolloresearch",
+    "jacc-study-authors", "jacc-data-report-authors",
+    "anthropic-fellows-program", "anthropic-fellows",
+    "anthropic-fellows-/-alignment-science-team", "anthropic-research",
+    "jmir-2024", "jmir 2024",
+    "oettl-et-al.,-journal-of-experimental-orthopaedics",
+    "oettl et al., journal of experimental orthopaedics",
+    "jacc", "nct06548490", "pmc",
+    "conitzer-et-al.-(2024)", "aquino-michaels-2026", "pan-et-al.",
+    "pan-et-al.-'natural-language-agent-harnesses'",
+    "stanford", "stanford-meta-harness",
+    "hendershot", "annals-im",
+    "nellie-liang,-brookings-institution", "nellie liang, brookings institution",
+    "penn-state", "american-heart-association", "american heart association",
+    "molt_cornelius", "molt-cornelius",
+    # Companies / labs / brand-orgs (not specific humans)
+    "anthropic", "anthropicai", "openai", "nasa", "icrc", "ecri",
+    "epochairesearch", "metadao", "iapam", "icer",
+    "who", "ama", "uspstf", "unknown",
+    "futard.io",  # protocol/platform
+    "oxford-martin-ai-governance-initiative",
+    "oxford-martin-ai-governance",
+    "u.s.-food-and-drug-administration",
+    "jitse-goutbeek,-european-policy-centre",  # cited person+org string → publisher
+    "adepoju-et-al.",  # paper citation
+    # Formal-citation names (Firstname-Lastname or Lastname-et-al) — classified
+    # as academic citations, not reachable contributors. They'd need an @ handle
+    # to get CI credit per Cory's growth-loop design.
+    "senator-elissa-slotkin",
+    "bostrom", "hanson", "kaufmann", "noah-smith", "doug-shapiro",
+    "shayon-sengupta", "shayon sengupta",
+    "robin-hanson", "robin hanson", "eliezer-yudkowsky",
+    "leopold-aschenbrenner", "aschenbrenner",
+    "ramstead", "larsson", "heavey",
+    "dan-slimmon", "van-leeuwaarden", "ward-whitt", "adams",
+    "tamim-ansary", "spizzirri",
+    "dario-amodei",  # formal-citation form (real @ is @darioamodei)
+    "corless", "oxranga", "vlahakis",
+    # Brand/project/DAO tokens — not individuals
+    "areal-dao", "areal", "theiaresearch", "futard-io", "dhrumil",
+    # Classic formal-citation names — famous academics/economists cited by surname.
+    # Reachable via @ handle if/when they join (e.g. Ostrom has no X, Hayek deceased,
+    # Friston has an institutional affiliation not an @ handle we'd track).
+    "clayton-christensen", "hidalgo", "coase", "wiener", "juarrero",
+    "ostrom", "centola", "hayek", "marshall-mcluhan", "blackmore",
+    "knuth", "friston", "aquino-michaels", "conitzer", "bak",
+}
+# NOTE: pseudonymous X handles that MAY be real contributors stay in keep_person:
+#   karpathy, simonw, swyx, metaproph3t, metanallok, mmdhrumil, sjdedic,
+#   ceterispar1bus — these are real X accounts and match Cory's growth loop.
+# They appear without @ prefix because extraction frontmatter didn't normalize.
+# Auto-creating them as contributors tier='cited' is correct (A-path from earlier).
+PUBLISHERS_SOCIAL = {
+    "x", "twitter", "telegram", "x.com",
+}
+PUBLISHERS_INTERNAL = {
+    "teleohumanity-manifesto", "strategy-session-journal",
+    "living-capital-thesis-development", "attractor-state-historical-backtesting",
+    "web-research-compilation", "architectural-investing",
+    "governance---meritocratic-voting-+-futarchy",  # title artifact
+    "sec-interpretive-release-s7-2026-09-(march-17",  # title artifact
+    "mindstudio",  # tooling/platform, not contributor
+}
+# Merge into one kind→set map for classification
+PUBLISHER_KIND_MAP = {}
+for h in PUBLISHERS_NEWS:
+    PUBLISHER_KIND_MAP[h.lower()] = "news"
+for h in PUBLISHERS_ACADEMIC:
+    PUBLISHER_KIND_MAP[h.lower()] = "academic"
+for h in PUBLISHERS_SOCIAL:
+    PUBLISHER_KIND_MAP[h.lower()] = "social_platform"
+for h in PUBLISHERS_INTERNAL:
+    PUBLISHER_KIND_MAP[h.lower()] = "internal"
+
+
+# Garbage: handles that are clearly parse artifacts, not real names.
+# Pattern: contains parens, special chars, or >50 chars.
+def is_garbage(handle: str) -> bool:
+    h = handle.strip()
+    if len(h) > 50:
+        return True
+    if re.search(r"[()\[\]<>{}\/\\|@#$%^&*=?!:;\"']", h):
+        # But @ can appear legitimately in handles like @thesensatore — allow if @ is only prefix
+        if h.startswith("@") and not re.search(r"[()\[\]<>{}\/\\|#$%^&*=?!:;\"']", h):
+            return False
+        return True
+    # Multi-word hyphenated with very specific artifact shape: 3+ hyphens in a row or trailing noise
+    if "---" in h or "---meritocratic" in h or h.endswith("(march") or h.endswith("-(march"):
+        return True
+    return False
+
+
+def classify(handle: str) -> tuple[str, str | None]:
+    """Return (category, publisher_kind).
+
+    category ∈ {'keep_agent', 'keep_person', 'publisher', 'garbage', 'review_needed'}
+    publisher_kind ∈ {'news','academic','social_platform','internal', None}
+    """
+    h = handle.strip().lower().lstrip("@")
+
+    if h in PENTAGON_AGENTS:
+        return ("keep_agent", None)
+
+    if h in PUBLISHER_KIND_MAP:
+        return ("publisher", PUBLISHER_KIND_MAP[h])
+
+    if is_garbage(handle):
+        return ("garbage", None)
+
+    # @-prefixed handles or short-slug real-looking names → keep as person
+    # (Auto-create rule from Cory: @ handles auto-join as tier='cited'.)
+    if handle.startswith("@"):
+        return ("keep_person", None)
+
+    # Short plausible handles (<=20 chars, alphanum + underscore/hyphen): treat as person
+    if re.match(r"^[a-z0-9][a-z0-9_-]{0,19}$", h):
+        return ("keep_person", None)
+
+    # Everything else: needs human review
+    return ("review_needed", None)
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--apply", action="store_true", help="Write changes to DB")
+    parser.add_argument("--show", type=str, help="Inspect a single handle")
+    parser.add_argument("--delete-events", action="store_true",
+                        help="DELETE contribution_events for publishers+garbage (default: keep for audit)")
+    args = parser.parse_args()
+
+    if not Path(DB_PATH).exists():
+        print(f"ERROR: DB not found at {DB_PATH}", file=sys.stderr)
+        sys.exit(1)
+
+    conn = sqlite3.connect(DB_PATH, timeout=30)
+    conn.row_factory = sqlite3.Row
+
+    # Sanity: publishers table must exist (v26 migration applied)
+    try:
+        conn.execute("SELECT 1 FROM publishers LIMIT 1")
+    except sqlite3.OperationalError:
+        print("ERROR: publishers table missing. Run migration v26 first.", file=sys.stderr)
+        sys.exit(2)
+
+    rows = conn.execute(
+        "SELECT handle, kind, tier, claims_merged FROM contributors ORDER BY claims_merged DESC"
+    ).fetchall()
+
+    if args.show:
+        target = args.show.strip().lower().lstrip("@")
+        for r in rows:
+            if r["handle"].lower().lstrip("@") == target:
+                category, pkind = classify(r["handle"])
+                events_count = conn.execute(
+                    "SELECT COUNT(*) FROM contribution_events WHERE handle = ?",
+                    (r["handle"].lower().lstrip("@"),),
+                ).fetchone()[0]
+                print(f"handle:         {r['handle']}")
+                print(f"current_kind:   {r['kind']}")
+                print(f"current_tier:   {r['tier']}")
+                print(f"claims_merged:  {r['claims_merged']}")
+                print(f"events:         {events_count}")
+                print(f"→ category:     {category}")
+                if pkind:
+                    print(f"→ publisher:    kind={pkind}")
+                return
+        print(f"No match for '{args.show}'")
+        return
+
+    # Classify all
+    buckets: dict[str, list[dict]] = {
+        "keep_agent": [],
+        "keep_person": [],
+        "publisher": [],
+        "garbage": [],
+        "review_needed": [],
+    }
+    for r in rows:
+        category, pkind = classify(r["handle"])
+        buckets[category].append({
+            "handle": r["handle"],
+            "kind_now": r["kind"],
+            "tier": r["tier"],
+            "claims": r["claims_merged"] or 0,
+            "publisher_kind": pkind,
+        })
+
+    print("=== Classification summary ===")
+    for cat, items in buckets.items():
+        print(f"  {cat:18s}  {len(items):5d}")
+
+    print("\n=== Sample of each category ===")
+    for cat, items in buckets.items():
+        print(f"\n--- {cat} (showing up to 10) ---")
+        for item in items[:10]:
+            tag = f" → {item['publisher_kind']}" if item["publisher_kind"] else ""
+            print(f"  {item['handle']:50s} claims={item['claims']:5d}{tag}")
+
+    print("\n=== Full review_needed list ===")
+    for item in buckets["review_needed"]:
+        print(f"  {item['handle']:50s} claims={item['claims']:5d}")
+
+    if not args.apply:
+        print("\n(dry-run — no writes. Re-run with --apply to execute.)")
+        return
+
+    # ── Apply changes ──
+    print("\n=== Applying changes ===")
+    if buckets["review_needed"]:
+        print(f"ABORT: {len(buckets['review_needed'])} rows need human review. Fix classifier before --apply.")
+        sys.exit(3)
+
+    inserted_publishers = 0
+    reclassified_agents = 0
+    deleted_garbage = 0
+    deleted_publisher_rows = 0
+    deleted_events = 0
+
+    # Single transaction — if any step errors, roll back. This prevents the failure
+    # mode where a publisher insert fails silently and we still delete the contributor
+    # row, losing data.
+    try:
+        conn.execute("BEGIN")
+
+        # 1. Insert publishers. Track which ones succeeded so step 4 only deletes those.
+        moved_to_publisher = set()
+        for item in buckets["publisher"]:
+            name = item["handle"].strip().lower().lstrip("@")
+            conn.execute(
+                "INSERT OR IGNORE INTO publishers (name, kind) VALUES (?, ?)",
+                (name, item["publisher_kind"]),
+            )
+            moved_to_publisher.add(item["handle"])
+            inserted_publishers += 1
+
+        # 2. Ensure Pentagon agents have kind='agent' (idempotent after v25 patch)
+        for item in buckets["keep_agent"]:
+            conn.execute(
+                "UPDATE contributors SET kind = 'agent' WHERE handle = ?",
+                (item["handle"].lower().lstrip("@"),),
+            )
+            reclassified_agents += 1
+
+        # 3. Delete garbage handles from contributors (and their events)
+        for item in buckets["garbage"]:
+            if args.delete_events:
+                cur = conn.execute(
+                    "DELETE FROM contribution_events WHERE handle = ?",
+                    (item["handle"].lower().lstrip("@"),),
+                )
+                deleted_events += cur.rowcount
+            cur = conn.execute(
+                "DELETE FROM contributors WHERE handle = ?",
+                (item["handle"],),
+            )
+            deleted_garbage += cur.rowcount
+
+        # 4. Delete publisher rows from contributors — ONLY for those successfully
+        # inserted into publishers above. Guards against partial failure.
+        for item in buckets["publisher"]:
+            if item["handle"] not in moved_to_publisher:
+                continue
+            if args.delete_events:
+                cur = conn.execute(
+                    "DELETE FROM contribution_events WHERE handle = ?",
+                    (item["handle"].lower().lstrip("@"),),
+                )
+                deleted_events += cur.rowcount
+            cur = conn.execute(
+                "DELETE FROM contributors WHERE handle = ?",
+                (item["handle"],),
+            )
+            deleted_publisher_rows += cur.rowcount
+
+        conn.commit()
+    except Exception as e:
+        conn.rollback()
+        print(f"ERROR: Transaction failed, rolled back. {e}", file=sys.stderr)
+        sys.exit(4)
+
+    print(f"  publishers inserted:          {inserted_publishers}")
+    print(f"  agents kind='agent' ensured:  {reclassified_agents}")
+    print(f"  garbage rows deleted:         {deleted_garbage}")
+    print(f"  publisher rows removed from contributors: {deleted_publisher_rows}")
+    if args.delete_events:
+        print(f"  contribution_events deleted:  {deleted_events}")
+    else:
+        print(f"  (events kept — re-run with --delete-events to clean them)")
+
+
+if __name__ == "__main__":
+    main()