Compare commits
148 commits
ganymede/p
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| 926a397839 | |||
| 3fe524dd14 | |||
| 45b2f6de20 | |||
| f0f9388c1f | |||
| 0f2b153c92 | |||
| 762fd4233e | |||
| 10d5c275da | |||
| 1d6b51527a | |||
| 540ba97b9d | |||
| 58fa8c5276 | |||
| 93917f9fc2 | |||
| 3fe0f4b744 | |||
| 05d15cea56 | |||
| cfcb06a6dc | |||
| 2f6424617b | |||
| 9a943e8460 | |||
| 84f6d3682c | |||
| 33c17f87a8 | |||
| a053a8ebf9 | |||
| 97b590acd6 | |||
| 469cb7f2da | |||
| 8de28d6ee0 | |||
| 05f375d775 | |||
| 4101048cd0 | |||
| af027d3ced | |||
| 1b27a2de31 | |||
| 11e026448a | |||
| c3d0b1f5a4 | |||
| 88e8e15c6d | |||
| 5463ca0b56 | |||
| e043cf98dc | |||
| 9c0be78620 | |||
| c29049924e | |||
| f463f49b46 | |||
| 9505e5b40a | |||
| f0cf772182 | |||
| 4fc541c656 | |||
| b7242d2206 | |||
| 12078c8707 | |||
| 7a753da68b | |||
| febbc7da30 | |||
| 368b5793d3 | |||
| 670c50f384 | |||
| a479ab533b | |||
| eac5d2f0d3 | |||
| 5071ecef16 | |||
| ddf3c25e88 | |||
| cde92d3db1 | |||
| 83526bc90e | |||
| ae860a1d06 | |||
| 878f6e06e3 | |||
| ac794f5c68 | |||
| 25a537d2e1 | |||
| 0f868aefab | |||
| 13f21f7732 | |||
| 0b28c71e11 | |||
| fb121e4010 | |||
| 26a8b15f56 | |||
| 687f3d3151 | |||
| 22b6ebb6f6 | |||
| 0ce7412396 | |||
| 28b25329b3 | |||
| c763c99910 | |||
| 4c3ce265e4 | |||
| 46ad508de7 | |||
| ed1edd6466 | |||
| 53dc18afd5 | |||
| f46e14dfae | |||
| 376b77999f | |||
| 716cc43890 | |||
| c8a08023f9 | |||
| 1e0c1cd788 | |||
| 1f5eb324f3 | |||
| d073e22e8d | |||
| 552f44ec1c | |||
| e0c9951308 | |||
| 0d3fe95522 | |||
| 1755580b95 | |||
| ad7ee0831e | |||
| 10b4e27c28 | |||
| 2b58ffc765 | |||
| 50ef90e7d3 | |||
| f38b1e3c01 | |||
| ff357c4bbc | |||
| 25062cf130 | |||
| fe996c3299 | |||
| 81afcd319f | |||
| d2aec7fee3 | |||
| 681afad506 | |||
| 95f637491e | |||
| be010e666a | |||
| 84cb001dd6 | |||
| 16e798f6a2 | |||
| b091642146 | |||
| 6b3a5833df | |||
| 2253f48993 | |||
| ff68ebc561 | |||
| d89fb29c9e | |||
| 5e0cdfc63a | |||
| 9e42c34271 | |||
| f25a4093c2 | |||
| 686ef3fd7f | |||
| f43f8f923f | |||
| ad48d7384e | |||
| b92d2af1ac | |||
| e17e6c25db | |||
| 5f554bc2de | |||
| 0457c49094 | |||
| 89692fda2d | |||
| f5b27ccd73 | |||
| 47fa33fd53 | |||
| 2b49b17eb2 | |||
| 305445b164 | |||
| ae1cce730c | |||
| 4b5c5841ce | |||
| cfb80d3496 | |||
| 1dfc6dcc5c | |||
| b5aabe0364 | |||
| 0854375fd0 | |||
| 1019602eec | |||
| 66bc742979 | |||
| 0759655688 | |||
| 102d97859c | |||
| e4d7ca42ac | |||
| 02c86e9050 | |||
| 458cd7dfda | |||
| 7232755d11 | |||
| c2ff4996e3 | |||
| b3c635290f | |||
| 8ff4784fcb | |||
| a19db22b16 | |||
| bb3b033b57 | |||
| 60c92d5c19 | |||
| d33ddd9f3d | |||
| 0bedc43c94 | |||
| 2ec4c445b1 | |||
| 76f13de681 | |||
| d67d36b409 | |||
| 9267351aba | |||
| 6c6cd0d14e | |||
| e1934b30ae | |||
| a292ab75c2 | |||
| 28be7555b1 | |||
| f77fd229d6 | |||
| 089b4609d5 | |||
| 3ed0f20fa1 | |||
| 425e7a1bac | |||
| 839a6589e5 |
164 changed files with 38667 additions and 1837 deletions
3
.gitignore
vendored
3
.gitignore
vendored
|
|
@ -30,3 +30,6 @@ build/
|
|||
|
||||
# OS
|
||||
.DS_Store
|
||||
|
||||
# Hermes session artifacts
|
||||
ops/sessions/
|
||||
|
|
|
|||
79
CODEOWNERS
Normal file
79
CODEOWNERS
Normal file
|
|
@ -0,0 +1,79 @@
|
|||
# teleo-infrastructure ownership map
|
||||
# Each path has ONE owning agent. Owner = accountable for correctness + reviews changes.
|
||||
# Format: <pattern> <owner>
|
||||
|
||||
# Pipeline daemon — entry points
|
||||
/teleo-pipeline.py @ship
|
||||
/reweave.py @ship
|
||||
|
||||
# Pipeline library — shared Python package
|
||||
/lib/config.py @ship
|
||||
/lib/db.py @ship
|
||||
/lib/connect.py @ship
|
||||
/lib/log.py @ship
|
||||
/lib/forgejo.py @ship
|
||||
/lib/breaker.py @ship
|
||||
/lib/worktree_lock.py @ship
|
||||
/lib/domains.py @ship
|
||||
/lib/costs.py @ship
|
||||
/lib/llm.py @ship
|
||||
/lib/merge.py @ship
|
||||
/lib/cascade.py @ship
|
||||
/lib/cross_domain.py @ship
|
||||
/lib/validate.py @ship
|
||||
/lib/stale_pr.py @ship
|
||||
/lib/watchdog.py @ship
|
||||
/lib/feedback.py @ship
|
||||
/lib/fixer.py @ship
|
||||
/lib/substantive_fixer.py @ship
|
||||
/lib/dedup.py @ship
|
||||
|
||||
/lib/extract.py @epimetheus
|
||||
/lib/extraction_prompt.py @epimetheus
|
||||
/lib/post_extract.py @epimetheus
|
||||
/lib/pre_screen.py @epimetheus
|
||||
/lib/entity_batch.py @epimetheus
|
||||
/lib/entity_queue.py @epimetheus
|
||||
|
||||
/lib/evaluate.py @leo
|
||||
/lib/analytics.py @leo
|
||||
/lib/attribution.py @leo
|
||||
|
||||
/lib/health.py @argus
|
||||
/lib/search.py @argus
|
||||
/lib/claim_index.py @argus
|
||||
/lib/digest.py @argus
|
||||
|
||||
# Diagnostics — monitoring dashboard
|
||||
/diagnostics/ @argus
|
||||
|
||||
# Telegram bot
|
||||
/telegram/ @ship
|
||||
|
||||
# Deployment automation
|
||||
/deploy/ @ship
|
||||
|
||||
# Systemd service definitions
|
||||
/systemd/ @ship
|
||||
|
||||
# Agent state management
|
||||
/agent-state/ @ship
|
||||
|
||||
# Research orchestration
|
||||
/research/ @ship
|
||||
|
||||
# Hermes agent
|
||||
/hermes-agent/ @ship
|
||||
|
||||
# One-off scripts and migrations
|
||||
/scripts/ @ship
|
||||
|
||||
# Test suite
|
||||
/tests/ @ganymede
|
||||
|
||||
# Documentation
|
||||
/docs/ shared
|
||||
|
||||
# Config
|
||||
/pyproject.toml @ship
|
||||
/.gitignore @ship
|
||||
65
README.md
Normal file
65
README.md
Normal file
|
|
@ -0,0 +1,65 @@
|
|||
# teleo-infrastructure
|
||||
|
||||
Pipeline infrastructure for the Teleo collective knowledge base. Async Python daemon that extracts, validates, evaluates, and merges claims via Forgejo PRs.
|
||||
|
||||
## Directory Structure
|
||||
|
||||
```
|
||||
teleo-infrastructure/
|
||||
├── teleo-pipeline.py # Daemon entry point
|
||||
├── reweave.py # Reciprocal edge maintenance
|
||||
├── lib/ # Pipeline modules (Python package)
|
||||
├── diagnostics/ # Monitoring dashboard (port 8081)
|
||||
├── telegram/ # Telegram bot interface
|
||||
├── deploy/ # Deployment + mirror scripts
|
||||
├── systemd/ # Service definitions
|
||||
├── agent-state/ # Cross-session agent state
|
||||
├── research/ # Nightly research orchestration
|
||||
├── hermes-agent/ # Hermes agent setup
|
||||
├── scripts/ # One-off backfills + migrations
|
||||
├── tests/ # Test suite
|
||||
└── docs/ # Operational documentation
|
||||
```
|
||||
|
||||
## Ownership
|
||||
|
||||
Each directory has one owning agent. The owner is accountable for correctness and reviews all changes to their section. See `CODEOWNERS` for per-file detail.
|
||||
|
||||
| Directory | Owner | What it does |
|
||||
|-----------|-------|-------------|
|
||||
| `lib/` (core) | **Ship** | Config, DB, merge, cascade, validation, LLM calls |
|
||||
| `lib/` (extraction) | **Epimetheus** | Source extraction, entity processing, pre-screening |
|
||||
| `lib/` (evaluation) | **Leo** | Claim evaluation, analytics, attribution |
|
||||
| `lib/` (health) | **Argus** | Health checks, search, claim index |
|
||||
| `diagnostics/` | **Argus** | 4-page dashboard, alerting, vitality metrics |
|
||||
| `telegram/` | **Ship** | Telegram bot, X integration, retrieval |
|
||||
| `deploy/` | **Ship** | rsync deploy, GitHub-Forgejo mirror |
|
||||
| `systemd/` | **Ship** | teleo-pipeline, teleo-diagnostics, teleo-agent@ |
|
||||
| `agent-state/` | **Ship** | Bootstrap, state library, cascade inbox processor |
|
||||
| `research/` | **Ship** | Nightly research sessions, prompt templates |
|
||||
| `scripts/` | **Ship** | Backfills, migrations, one-off maintenance |
|
||||
| `tests/` | **Ganymede** | pytest suite, integration tests |
|
||||
| `docs/` | Shared | Architecture, specs, protocols |
|
||||
|
||||
## VPS Layout
|
||||
|
||||
Runs on Hetzner CAX31 (77.42.65.182) as user `teleo`.
|
||||
|
||||
| VPS Path | Repo Source | Service |
|
||||
|----------|-------------|---------|
|
||||
| `/opt/teleo-eval/pipeline/` | `lib/`, `teleo-pipeline.py`, `reweave.py` | teleo-pipeline |
|
||||
| `/opt/teleo-eval/diagnostics/` | `diagnostics/` | teleo-diagnostics |
|
||||
| `/opt/teleo-eval/telegram/` | `telegram/` | (manual) |
|
||||
| `/opt/teleo-eval/agent-state/` | `agent-state/` | (used by research-session.sh) |
|
||||
|
||||
## Quick Start
|
||||
|
||||
```bash
|
||||
# Run tests
|
||||
pip install -e ".[dev]"
|
||||
pytest
|
||||
|
||||
# Deploy to VPS
|
||||
./deploy/deploy.sh --dry-run # preview
|
||||
./deploy/deploy.sh # deploy
|
||||
```
|
||||
255
agent-state/SCHEMA.md
Normal file
255
agent-state/SCHEMA.md
Normal file
|
|
@ -0,0 +1,255 @@
|
|||
# Agent State Schema v1
|
||||
|
||||
File-backed durable state for teleo agents running headless on VPS.
|
||||
Survives context truncation, crash recovery, and session handoffs.
|
||||
|
||||
## Design Principles
|
||||
|
||||
1. **Three formats** — JSON for structured fields, JSONL for append-only logs, Markdown for context-window-friendly content
|
||||
2. **Many small files** — selective loading, crash isolation, no locks needed
|
||||
3. **Write on events** — not timers. State updates happen when something meaningful changes.
|
||||
4. **Shared-nothing writes** — each agent owns its directory. Communication via inbox files.
|
||||
5. **State ≠ Git** — state is operational (how the agent functions). Git is output (what the agent produces).
|
||||
|
||||
## Directory Layout
|
||||
|
||||
```
|
||||
/opt/teleo-eval/agent-state/{agent}/
|
||||
├── report.json # Current status — read every wake
|
||||
├── tasks.json # Active task queue — read every wake
|
||||
├── session.json # Current/last session metadata
|
||||
├── memory.md # Accumulated cross-session knowledge (structured)
|
||||
├── inbox/ # Messages from other agents/orchestrator
|
||||
│ └── {uuid}.json # One file per message, atomic create
|
||||
├── journal.jsonl # Append-only session log
|
||||
└── metrics.json # Cumulative performance counters
|
||||
```
|
||||
|
||||
## File Specifications
|
||||
|
||||
### report.json
|
||||
|
||||
Written: after each meaningful action (session start, key finding, session end)
|
||||
Read: every wake, by orchestrator for monitoring
|
||||
|
||||
```json
|
||||
{
|
||||
"agent": "rio",
|
||||
"updated_at": "2026-03-31T22:00:00Z",
|
||||
"status": "idle | researching | extracting | evaluating | error",
|
||||
"summary": "Completed research session — 8 sources archived on Solana launchpad mechanics",
|
||||
"current_task": null,
|
||||
"last_session": {
|
||||
"id": "20260331-220000",
|
||||
"started_at": "2026-03-31T20:30:00Z",
|
||||
"ended_at": "2026-03-31T22:00:00Z",
|
||||
"outcome": "completed | timeout | error",
|
||||
"sources_archived": 8,
|
||||
"branch": "rio/research-2026-03-31",
|
||||
"pr_number": 247
|
||||
},
|
||||
"blocked_by": null,
|
||||
"next_priority": "Follow up on conditional AMM thread from @0xfbifemboy"
|
||||
}
|
||||
```
|
||||
|
||||
### tasks.json
|
||||
|
||||
Written: when task status changes
|
||||
Read: every wake
|
||||
|
||||
```json
|
||||
{
|
||||
"agent": "rio",
|
||||
"updated_at": "2026-03-31T22:00:00Z",
|
||||
"tasks": [
|
||||
{
|
||||
"id": "task-001",
|
||||
"type": "research | extract | evaluate | follow-up | disconfirm",
|
||||
"description": "Investigate conditional AMM mechanisms in MetaDAO v2",
|
||||
"status": "pending | active | completed | dropped",
|
||||
"priority": "high | medium | low",
|
||||
"created_at": "2026-03-31T22:00:00Z",
|
||||
"context": "Flagged in research session 2026-03-31 — @0xfbifemboy thread on conditional liquidity",
|
||||
"follow_up_from": null,
|
||||
"completed_at": null,
|
||||
"outcome": null
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
### session.json
|
||||
|
||||
Written: at session start and session end
|
||||
Read: every wake (for continuation), by orchestrator for scheduling
|
||||
|
||||
```json
|
||||
{
|
||||
"agent": "rio",
|
||||
"session_id": "20260331-220000",
|
||||
"started_at": "2026-03-31T20:30:00Z",
|
||||
"ended_at": "2026-03-31T22:00:00Z",
|
||||
"type": "research | extract | evaluate | ad-hoc",
|
||||
"domain": "internet-finance",
|
||||
"branch": "rio/research-2026-03-31",
|
||||
"status": "running | completed | timeout | error",
|
||||
"model": "sonnet",
|
||||
"timeout_seconds": 5400,
|
||||
"research_question": "How is conditional liquidity being implemented in Solana AMMs?",
|
||||
"belief_targeted": "Markets aggregate information better than votes because skin-in-the-game creates selection pressure on beliefs",
|
||||
"disconfirmation_target": "Cases where prediction markets failed to aggregate information despite financial incentives",
|
||||
"sources_archived": 8,
|
||||
"sources_expected": 10,
|
||||
"tokens_used": null,
|
||||
"cost_usd": null,
|
||||
"errors": [],
|
||||
"handoff_notes": "Found 3 sources on conditional AMM failures — needs extraction. Also flagged @metaproph3t thread for Theseus (AI governance angle)."
|
||||
}
|
||||
```
|
||||
|
||||
### memory.md
|
||||
|
||||
Written: at session end, when learning something critical
|
||||
Read: every wake (included in research prompt context)
|
||||
|
||||
```markdown
|
||||
# Rio — Operational Memory
|
||||
|
||||
## Cross-Session Patterns
|
||||
- Conditional AMMs keep appearing across 3+ independent sources (sessions 03-28, 03-29, 03-31). This is likely a real trend, not cherry-picking.
|
||||
- @0xfbifemboy consistently produces highest-signal threads in the DeFi mechanism design space.
|
||||
|
||||
## Dead Ends (don't re-investigate)
|
||||
- Polymarket fee structure analysis (2026-03-25): fully documented in existing claims, no new angles.
|
||||
- Jupiter governance token utility (2026-03-27): vaporware, no mechanism to analyze.
|
||||
|
||||
## Open Questions
|
||||
- Is MetaDAO's conditional market maker manipulation-resistant at scale? No evidence either way yet.
|
||||
- How does futarchy handle low-liquidity markets? This is the keystone weakness.
|
||||
|
||||
## Corrections
|
||||
- Previously believed Drift protocol was pure order-book. Actually hybrid AMM+CLOB. Updated 2026-03-30.
|
||||
|
||||
## Cross-Agent Flags Received
|
||||
- Theseus (2026-03-29): "Check if MetaDAO governance has AI agent participation — alignment implications"
|
||||
- Leo (2026-03-28): "Your conditional AMM analysis connects to Astra's resource allocation claims"
|
||||
```
|
||||
|
||||
### inbox/{uuid}.json
|
||||
|
||||
Written: by other agents or orchestrator
|
||||
Read: checked on wake, deleted after processing
|
||||
|
||||
```json
|
||||
{
|
||||
"id": "msg-abc123",
|
||||
"from": "theseus",
|
||||
"to": "rio",
|
||||
"created_at": "2026-03-31T18:00:00Z",
|
||||
"type": "flag | task | question | cascade",
|
||||
"priority": "high | normal",
|
||||
"subject": "Check MetaDAO for AI agent participation",
|
||||
"body": "Found evidence that AI agents are trading on Drift — check if any are participating in MetaDAO conditional markets. Alignment implications if automated agents are influencing futarchic governance.",
|
||||
"source_ref": "theseus/research-2026-03-31",
|
||||
"expires_at": null
|
||||
}
|
||||
```
|
||||
|
||||
### journal.jsonl
|
||||
|
||||
Written: append at session boundaries
|
||||
Read: debug/audit only (never loaded into agent context by default)
|
||||
|
||||
```jsonl
|
||||
{"ts":"2026-03-31T20:30:00Z","event":"session_start","session_id":"20260331-220000","type":"research"}
|
||||
{"ts":"2026-03-31T20:35:00Z","event":"orient_complete","files_read":["identity.md","beliefs.md","reasoning.md","_map.md"]}
|
||||
{"ts":"2026-03-31T21:30:00Z","event":"sources_archived","count":5,"domain":"internet-finance"}
|
||||
{"ts":"2026-03-31T22:00:00Z","event":"session_end","outcome":"completed","sources_archived":8,"handoff":"conditional AMM failures need extraction"}
|
||||
```
|
||||
|
||||
### metrics.json
|
||||
|
||||
Written: at session end (cumulative counters)
|
||||
Read: by CI scoring system, by orchestrator for scheduling decisions
|
||||
|
||||
```json
|
||||
{
|
||||
"agent": "rio",
|
||||
"updated_at": "2026-03-31T22:00:00Z",
|
||||
"lifetime": {
|
||||
"sessions_total": 47,
|
||||
"sessions_completed": 42,
|
||||
"sessions_timeout": 3,
|
||||
"sessions_error": 2,
|
||||
"sources_archived": 312,
|
||||
"claims_proposed": 89,
|
||||
"claims_accepted": 71,
|
||||
"claims_challenged": 12,
|
||||
"claims_rejected": 6,
|
||||
"disconfirmation_attempts": 47,
|
||||
"disconfirmation_hits": 8,
|
||||
"cross_agent_flags_sent": 23,
|
||||
"cross_agent_flags_received": 15
|
||||
},
|
||||
"rolling_30d": {
|
||||
"sessions": 12,
|
||||
"sources_archived": 87,
|
||||
"claims_proposed": 24,
|
||||
"acceptance_rate": 0.83,
|
||||
"avg_sources_per_session": 7.25
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Integration Points
|
||||
|
||||
### research-session.sh
|
||||
|
||||
Add these hooks:
|
||||
|
||||
1. **Pre-session** (after branch creation, before Claude launch):
|
||||
- Write `session.json` with status "running"
|
||||
- Write `report.json` with status "researching"
|
||||
- Append session_start to `journal.jsonl`
|
||||
- Include `memory.md` and `tasks.json` in the research prompt
|
||||
|
||||
2. **Post-session** (after commit, before/after PR):
|
||||
- Update `session.json` with outcome, source count, branch, PR number
|
||||
- Update `report.json` with summary and next_priority
|
||||
- Update `metrics.json` counters
|
||||
- Append session_end to `journal.jsonl`
|
||||
- Process and clean `inbox/` (mark processed messages)
|
||||
|
||||
3. **On error/timeout**:
|
||||
- Update `session.json` status to "error" or "timeout"
|
||||
- Update `report.json` with error info
|
||||
- Append error event to `journal.jsonl`
|
||||
|
||||
### Pipeline daemon (teleo-pipeline.py)
|
||||
|
||||
- Read `report.json` for all agents to build dashboard
|
||||
- Write to `inbox/` when cascade events need agent attention
|
||||
- Read `metrics.json` for scheduling decisions (deprioritize agents with high error rates)
|
||||
|
||||
### Claude research prompt
|
||||
|
||||
Add to the prompt:
|
||||
```
|
||||
### Step 0: Load Operational State (1 min)
|
||||
Read /opt/teleo-eval/agent-state/{agent}/memory.md — this is your cross-session operational memory.
|
||||
Read /opt/teleo-eval/agent-state/{agent}/tasks.json — check for pending tasks.
|
||||
Check /opt/teleo-eval/agent-state/{agent}/inbox/ for messages from other agents.
|
||||
Process any high-priority inbox items before choosing your research direction.
|
||||
```
|
||||
|
||||
## Bootstrap
|
||||
|
||||
Run `ops/agent-state/bootstrap.sh` to create directories and seed initial state for all agents.
|
||||
|
||||
## Migration from Existing State
|
||||
|
||||
- `research-journal.md` continues as-is (agent-written, in git). `memory.md` is the structured equivalent for operational state (not in git).
|
||||
- `ops/sessions/*.json` continue for backward compat. `session.json` per agent is the richer replacement.
|
||||
- `ops/queue.md` remains the human-visible task board. `tasks.json` per agent is the machine-readable equivalent.
|
||||
- Workspace flags (`~/.pentagon/workspace/collective/flag-*`) migrate to `inbox/` messages over time.
|
||||
145
agent-state/bootstrap.sh
Executable file
145
agent-state/bootstrap.sh
Executable file
|
|
@ -0,0 +1,145 @@
|
|||
#!/bin/bash
|
||||
# Bootstrap agent-state directories for all teleo agents.
|
||||
# Run once on VPS: bash ops/agent-state/bootstrap.sh
|
||||
# Safe to re-run — skips existing files, only creates missing ones.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
STATE_ROOT="${TELEO_STATE_ROOT:-/opt/teleo-eval/agent-state}"
|
||||
|
||||
AGENTS=("rio" "clay" "theseus" "vida" "astra" "leo")
|
||||
DOMAINS=("internet-finance" "entertainment" "ai-alignment" "health" "space-development" "grand-strategy")
|
||||
|
||||
log() { echo "[$(date -Iseconds)] $*"; }
|
||||
|
||||
for i in "${!AGENTS[@]}"; do
|
||||
AGENT="${AGENTS[$i]}"
|
||||
DOMAIN="${DOMAINS[$i]}"
|
||||
DIR="$STATE_ROOT/$AGENT"
|
||||
|
||||
log "Bootstrapping $AGENT..."
|
||||
mkdir -p "$DIR/inbox"
|
||||
|
||||
# report.json — current status
|
||||
if [ ! -f "$DIR/report.json" ]; then
|
||||
cat > "$DIR/report.json" <<EOJSON
|
||||
{
|
||||
"agent": "$AGENT",
|
||||
"updated_at": "$(date -u +%Y-%m-%dT%H:%M:%SZ)",
|
||||
"status": "idle",
|
||||
"summary": "State initialized — no sessions recorded yet.",
|
||||
"current_task": null,
|
||||
"last_session": null,
|
||||
"blocked_by": null,
|
||||
"next_priority": null
|
||||
}
|
||||
EOJSON
|
||||
log " Created report.json"
|
||||
fi
|
||||
|
||||
# tasks.json — empty task queue
|
||||
if [ ! -f "$DIR/tasks.json" ]; then
|
||||
cat > "$DIR/tasks.json" <<EOJSON
|
||||
{
|
||||
"agent": "$AGENT",
|
||||
"updated_at": "$(date -u +%Y-%m-%dT%H:%M:%SZ)",
|
||||
"tasks": []
|
||||
}
|
||||
EOJSON
|
||||
log " Created tasks.json"
|
||||
fi
|
||||
|
||||
# session.json — no session yet
|
||||
if [ ! -f "$DIR/session.json" ]; then
|
||||
cat > "$DIR/session.json" <<EOJSON
|
||||
{
|
||||
"agent": "$AGENT",
|
||||
"session_id": null,
|
||||
"started_at": null,
|
||||
"ended_at": null,
|
||||
"type": null,
|
||||
"domain": "$DOMAIN",
|
||||
"branch": null,
|
||||
"status": "idle",
|
||||
"model": null,
|
||||
"timeout_seconds": null,
|
||||
"research_question": null,
|
||||
"belief_targeted": null,
|
||||
"disconfirmation_target": null,
|
||||
"sources_archived": 0,
|
||||
"sources_expected": 0,
|
||||
"tokens_used": null,
|
||||
"cost_usd": null,
|
||||
"errors": [],
|
||||
"handoff_notes": null
|
||||
}
|
||||
EOJSON
|
||||
log " Created session.json"
|
||||
fi
|
||||
|
||||
# memory.md — empty operational memory
|
||||
if [ ! -f "$DIR/memory.md" ]; then
|
||||
cat > "$DIR/memory.md" <<EOMD
|
||||
# ${AGENT^} — Operational Memory
|
||||
|
||||
## Cross-Session Patterns
|
||||
(none yet)
|
||||
|
||||
## Dead Ends
|
||||
(none yet)
|
||||
|
||||
## Open Questions
|
||||
(none yet)
|
||||
|
||||
## Corrections
|
||||
(none yet)
|
||||
|
||||
## Cross-Agent Flags Received
|
||||
(none yet)
|
||||
EOMD
|
||||
log " Created memory.md"
|
||||
fi
|
||||
|
||||
# metrics.json — zero counters
|
||||
if [ ! -f "$DIR/metrics.json" ]; then
|
||||
cat > "$DIR/metrics.json" <<EOJSON
|
||||
{
|
||||
"agent": "$AGENT",
|
||||
"updated_at": "$(date -u +%Y-%m-%dT%H:%M:%SZ)",
|
||||
"lifetime": {
|
||||
"sessions_total": 0,
|
||||
"sessions_completed": 0,
|
||||
"sessions_timeout": 0,
|
||||
"sessions_error": 0,
|
||||
"sources_archived": 0,
|
||||
"claims_proposed": 0,
|
||||
"claims_accepted": 0,
|
||||
"claims_challenged": 0,
|
||||
"claims_rejected": 0,
|
||||
"disconfirmation_attempts": 0,
|
||||
"disconfirmation_hits": 0,
|
||||
"cross_agent_flags_sent": 0,
|
||||
"cross_agent_flags_received": 0
|
||||
},
|
||||
"rolling_30d": {
|
||||
"sessions": 0,
|
||||
"sources_archived": 0,
|
||||
"claims_proposed": 0,
|
||||
"acceptance_rate": 0.0,
|
||||
"avg_sources_per_session": 0.0
|
||||
}
|
||||
}
|
||||
EOJSON
|
||||
log " Created metrics.json"
|
||||
fi
|
||||
|
||||
# journal.jsonl — empty log
|
||||
if [ ! -f "$DIR/journal.jsonl" ]; then
|
||||
echo "{\"ts\":\"$(date -u +%Y-%m-%dT%H:%M:%SZ)\",\"event\":\"state_initialized\",\"schema_version\":\"1.0\"}" > "$DIR/journal.jsonl"
|
||||
log " Created journal.jsonl"
|
||||
fi
|
||||
|
||||
done
|
||||
|
||||
log "Bootstrap complete. State root: $STATE_ROOT"
|
||||
log "Agents initialized: ${AGENTS[*]}"
|
||||
281
agent-state/lib-state.sh
Executable file
281
agent-state/lib-state.sh
Executable file
|
|
@ -0,0 +1,281 @@
|
|||
#!/bin/bash
|
||||
# lib-state.sh — Bash helpers for reading/writing agent state files.
|
||||
# Source this in pipeline scripts: source ops/agent-state/lib-state.sh
|
||||
#
|
||||
# All writes use atomic rename (write to .tmp, then mv) to prevent corruption.
|
||||
# All reads return valid JSON or empty string on missing/corrupt files.
|
||||
|
||||
STATE_ROOT="${TELEO_STATE_ROOT:-/opt/teleo-eval/agent-state}"
|
||||
|
||||
# --- Internal helpers ---
|
||||
|
||||
_state_dir() {
|
||||
local agent="$1"
|
||||
echo "$STATE_ROOT/$agent"
|
||||
}
|
||||
|
||||
# --- Report (current status) ---
|
||||
|
||||
state_read_report() {
|
||||
local agent="$1"
|
||||
local file="$(_state_dir "$agent")/report.json"
|
||||
[ -f "$file" ] && cat "$file" || echo "{}"
|
||||
}
|
||||
|
||||
state_update_report() {
|
||||
local agent="$1"
|
||||
local status="$2"
|
||||
local summary="$3"
|
||||
local file="$(_state_dir "$agent")/report.json"
|
||||
|
||||
_STATE_FILE="$file" _STATE_AGENT="$agent" _STATE_STATUS="$status" \
|
||||
_STATE_SUMMARY="$summary" _STATE_TS="$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
|
||||
python3 -c "
|
||||
import json, os
|
||||
try:
|
||||
with open(os.environ['_STATE_FILE']) as f:
|
||||
data = json.load(f)
|
||||
except:
|
||||
data = {'agent': os.environ['_STATE_AGENT']}
|
||||
data['status'] = os.environ['_STATE_STATUS']
|
||||
data['summary'] = os.environ['_STATE_SUMMARY']
|
||||
data['updated_at'] = os.environ['_STATE_TS']
|
||||
print(json.dumps(data, indent=2))
|
||||
" | _atomic_write_stdin "$file"
|
||||
}
|
||||
|
||||
# Variant that takes full JSON from stdin
|
||||
_atomic_write_stdin() {
|
||||
local filepath="$1"
|
||||
local tmpfile="${filepath}.tmp.$$"
|
||||
cat > "$tmpfile"
|
||||
mv -f "$tmpfile" "$filepath"
|
||||
}
|
||||
|
||||
# Full report update with session info (called at session end)
|
||||
state_finalize_report() {
|
||||
local agent="$1"
|
||||
local status="$2"
|
||||
local summary="$3"
|
||||
local session_id="$4"
|
||||
local started_at="$5"
|
||||
local ended_at="$6"
|
||||
local outcome="$7"
|
||||
local sources="$8"
|
||||
local branch="$9"
|
||||
local pr_number="${10}"
|
||||
local next_priority="${11:-null}"
|
||||
local file="$(_state_dir "$agent")/report.json"
|
||||
|
||||
_STATE_FILE="$file" _STATE_AGENT="$agent" _STATE_STATUS="$status" \
|
||||
_STATE_SUMMARY="$summary" _STATE_SESSION_ID="$session_id" \
|
||||
_STATE_STARTED="$started_at" _STATE_ENDED="$ended_at" \
|
||||
_STATE_OUTCOME="$outcome" _STATE_SOURCES="$sources" \
|
||||
_STATE_BRANCH="$branch" _STATE_PR="$pr_number" \
|
||||
_STATE_NEXT="$next_priority" \
|
||||
python3 -c "
|
||||
import json, os
|
||||
e = os.environ
|
||||
sources = int(e['_STATE_SOURCES']) if e['_STATE_SOURCES'].isdigit() else 0
|
||||
pr = int(e['_STATE_PR']) if e['_STATE_PR'].isdigit() else None
|
||||
next_p = None if e['_STATE_NEXT'] == 'null' else e['_STATE_NEXT']
|
||||
data = {
|
||||
'agent': e['_STATE_AGENT'],
|
||||
'updated_at': e['_STATE_ENDED'],
|
||||
'status': e['_STATE_STATUS'],
|
||||
'summary': e['_STATE_SUMMARY'],
|
||||
'current_task': None,
|
||||
'last_session': {
|
||||
'id': e['_STATE_SESSION_ID'],
|
||||
'started_at': e['_STATE_STARTED'],
|
||||
'ended_at': e['_STATE_ENDED'],
|
||||
'outcome': e['_STATE_OUTCOME'],
|
||||
'sources_archived': sources,
|
||||
'branch': e['_STATE_BRANCH'],
|
||||
'pr_number': pr
|
||||
},
|
||||
'blocked_by': None,
|
||||
'next_priority': next_p
|
||||
}
|
||||
print(json.dumps(data, indent=2))
|
||||
" | _atomic_write_stdin "$file"
|
||||
}
|
||||
|
||||
# --- Session ---
|
||||
|
||||
state_start_session() {
|
||||
local agent="$1"
|
||||
local session_id="$2"
|
||||
local type="$3"
|
||||
local domain="$4"
|
||||
local branch="$5"
|
||||
local model="${6:-sonnet}"
|
||||
local timeout="${7:-5400}"
|
||||
local started_at
|
||||
started_at="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
|
||||
local file="$(_state_dir "$agent")/session.json"
|
||||
|
||||
_STATE_FILE="$file" _STATE_AGENT="$agent" _STATE_SID="$session_id" \
|
||||
_STATE_STARTED="$started_at" _STATE_TYPE="$type" _STATE_DOMAIN="$domain" \
|
||||
_STATE_BRANCH="$branch" _STATE_MODEL="$model" _STATE_TIMEOUT="$timeout" \
|
||||
python3 -c "
|
||||
import json, os
|
||||
e = os.environ
|
||||
data = {
|
||||
'agent': e['_STATE_AGENT'],
|
||||
'session_id': e['_STATE_SID'],
|
||||
'started_at': e['_STATE_STARTED'],
|
||||
'ended_at': None,
|
||||
'type': e['_STATE_TYPE'],
|
||||
'domain': e['_STATE_DOMAIN'],
|
||||
'branch': e['_STATE_BRANCH'],
|
||||
'status': 'running',
|
||||
'model': e['_STATE_MODEL'],
|
||||
'timeout_seconds': int(e['_STATE_TIMEOUT']),
|
||||
'research_question': None,
|
||||
'belief_targeted': None,
|
||||
'disconfirmation_target': None,
|
||||
'sources_archived': 0,
|
||||
'sources_expected': 0,
|
||||
'tokens_used': None,
|
||||
'cost_usd': None,
|
||||
'errors': [],
|
||||
'handoff_notes': None
|
||||
}
|
||||
print(json.dumps(data, indent=2))
|
||||
" | _atomic_write_stdin "$file"
|
||||
|
||||
echo "$started_at"
|
||||
}
|
||||
|
||||
state_end_session() {
|
||||
local agent="$1"
|
||||
local outcome="$2"
|
||||
local sources="${3:-0}"
|
||||
local pr_number="${4:-null}"
|
||||
local file="$(_state_dir "$agent")/session.json"
|
||||
|
||||
_STATE_FILE="$file" _STATE_OUTCOME="$outcome" _STATE_SOURCES="$sources" \
|
||||
_STATE_PR="$pr_number" _STATE_TS="$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
|
||||
python3 -c "
|
||||
import json, os
|
||||
e = os.environ
|
||||
with open(e['_STATE_FILE']) as f:
|
||||
data = json.load(f)
|
||||
data['ended_at'] = e['_STATE_TS']
|
||||
data['status'] = e['_STATE_OUTCOME']
|
||||
data['sources_archived'] = int(e['_STATE_SOURCES']) if e['_STATE_SOURCES'].isdigit() else 0
|
||||
pr = e.get('_STATE_PR', 'null')
|
||||
data['pr_number'] = int(pr) if pr.isdigit() else None
|
||||
print(json.dumps(data, indent=2))
|
||||
" | _atomic_write_stdin "$file"
|
||||
}
|
||||
|
||||
# --- Journal (append-only JSONL) ---
|
||||
|
||||
state_journal_append() {
|
||||
local agent="$1"
|
||||
local event="$2"
|
||||
shift 2
|
||||
# Remaining args are key=value pairs for extra fields
|
||||
local file="$(_state_dir "$agent")/journal.jsonl"
|
||||
|
||||
_STATE_TS="$(date -u +%Y-%m-%dT%H:%M:%SZ)" _STATE_EVT="$event" \
|
||||
python3 -c "
|
||||
import json, os, sys
|
||||
entry = {'ts': os.environ['_STATE_TS'], 'event': os.environ['_STATE_EVT']}
|
||||
for pair in sys.argv[1:]:
|
||||
k, _, v = pair.partition('=')
|
||||
if k:
|
||||
entry[k] = v
|
||||
print(json.dumps(entry))
|
||||
" "$@" >> "$file"
|
||||
}
|
||||
|
||||
# --- Metrics ---
|
||||
|
||||
state_update_metrics() {
|
||||
local agent="$1"
|
||||
local outcome="$2"
|
||||
local sources="${3:-0}"
|
||||
local file="$(_state_dir "$agent")/metrics.json"
|
||||
|
||||
_STATE_FILE="$file" _STATE_AGENT="$agent" _STATE_OUTCOME="$outcome" \
|
||||
_STATE_SOURCES="$sources" _STATE_TS="$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
|
||||
python3 -c "
|
||||
import json, os
|
||||
e = os.environ
|
||||
try:
|
||||
with open(e['_STATE_FILE']) as f:
|
||||
data = json.load(f)
|
||||
except:
|
||||
data = {'agent': e['_STATE_AGENT'], 'lifetime': {}, 'rolling_30d': {}}
|
||||
|
||||
lt = data.setdefault('lifetime', {})
|
||||
lt['sessions_total'] = lt.get('sessions_total', 0) + 1
|
||||
outcome = e['_STATE_OUTCOME']
|
||||
if outcome == 'completed':
|
||||
lt['sessions_completed'] = lt.get('sessions_completed', 0) + 1
|
||||
elif outcome == 'timeout':
|
||||
lt['sessions_timeout'] = lt.get('sessions_timeout', 0) + 1
|
||||
elif outcome == 'error':
|
||||
lt['sessions_error'] = lt.get('sessions_error', 0) + 1
|
||||
lt['sources_archived'] = lt.get('sources_archived', 0) + (int(e['_STATE_SOURCES']) if e['_STATE_SOURCES'].isdigit() else 0)
|
||||
|
||||
data['updated_at'] = e['_STATE_TS']
|
||||
print(json.dumps(data, indent=2))
|
||||
" | _atomic_write_stdin "$file"
|
||||
}
|
||||
|
||||
# --- Inbox ---
|
||||
|
||||
state_check_inbox() {
|
||||
local agent="$1"
|
||||
local inbox="$(_state_dir "$agent")/inbox"
|
||||
[ -d "$inbox" ] && ls "$inbox"/*.json 2>/dev/null || true
|
||||
}
|
||||
|
||||
state_send_message() {
|
||||
local from="$1"
|
||||
local to="$2"
|
||||
local type="$3"
|
||||
local subject="$4"
|
||||
local body="$5"
|
||||
local inbox="$(_state_dir "$to")/inbox"
|
||||
local msg_id="msg-$(date +%s)-$$"
|
||||
local file="$inbox/${msg_id}.json"
|
||||
|
||||
mkdir -p "$inbox"
|
||||
_STATE_FILE="$file" _STATE_MSGID="$msg_id" _STATE_FROM="$from" \
|
||||
_STATE_TO="$to" _STATE_TS="$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
|
||||
_STATE_TYPE="$type" _STATE_SUBJECT="$subject" _STATE_BODY="$body" \
|
||||
python3 -c "
|
||||
import json, os
|
||||
e = os.environ
|
||||
data = {
|
||||
'id': e['_STATE_MSGID'],
|
||||
'from': e['_STATE_FROM'],
|
||||
'to': e['_STATE_TO'],
|
||||
'created_at': e['_STATE_TS'],
|
||||
'type': e['_STATE_TYPE'],
|
||||
'priority': 'normal',
|
||||
'subject': e['_STATE_SUBJECT'],
|
||||
'body': e['_STATE_BODY'],
|
||||
'source_ref': None,
|
||||
'expires_at': None
|
||||
}
|
||||
print(json.dumps(data, indent=2))
|
||||
" | _atomic_write_stdin "$file"
|
||||
echo "$msg_id"
|
||||
}
|
||||
|
||||
# --- State directory check ---
|
||||
|
||||
state_ensure_dir() {
|
||||
local agent="$1"
|
||||
local dir="$(_state_dir "$agent")"
|
||||
if [ ! -d "$dir" ]; then
|
||||
echo "ERROR: Agent state not initialized for $agent. Run bootstrap.sh first." >&2
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
113
agent-state/process-cascade-inbox.py
Normal file
113
agent-state/process-cascade-inbox.py
Normal file
|
|
@ -0,0 +1,113 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Process cascade inbox messages after a research session.
|
||||
|
||||
For each unread cascade-*.md in an agent's inbox:
|
||||
1. Logs cascade_reviewed event to pipeline.db audit_log
|
||||
2. Moves the file to inbox/processed/
|
||||
|
||||
Usage: python3 process-cascade-inbox.py <agent-name>
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import sqlite3
|
||||
import sys
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
AGENT_STATE_DIR = Path(os.environ.get("AGENT_STATE_DIR", "/opt/teleo-eval/agent-state"))
|
||||
PIPELINE_DB = Path(os.environ.get("PIPELINE_DB", "/opt/teleo-eval/pipeline/pipeline.db"))
|
||||
|
||||
|
||||
def parse_frontmatter(text: str) -> dict:
|
||||
"""Parse YAML-like frontmatter from markdown."""
|
||||
fm = {}
|
||||
match = re.match(r'^---\n(.*?)\n---', text, re.DOTALL)
|
||||
if not match:
|
||||
return fm
|
||||
for line in match.group(1).strip().splitlines():
|
||||
if ':' in line:
|
||||
key, val = line.split(':', 1)
|
||||
fm[key.strip()] = val.strip().strip('"')
|
||||
return fm
|
||||
|
||||
|
||||
def process_agent_inbox(agent: str) -> int:
|
||||
"""Process cascade messages in agent's inbox. Returns count processed."""
|
||||
inbox_dir = AGENT_STATE_DIR / agent / "inbox"
|
||||
if not inbox_dir.exists():
|
||||
return 0
|
||||
|
||||
cascade_files = sorted(inbox_dir.glob("cascade-*.md"))
|
||||
if not cascade_files:
|
||||
return 0
|
||||
|
||||
# Ensure processed dir exists
|
||||
processed_dir = inbox_dir / "processed"
|
||||
processed_dir.mkdir(exist_ok=True)
|
||||
|
||||
processed = 0
|
||||
now = datetime.now(timezone.utc).isoformat()
|
||||
|
||||
try:
|
||||
conn = sqlite3.connect(str(PIPELINE_DB), timeout=10)
|
||||
conn.execute("PRAGMA journal_mode=WAL")
|
||||
except sqlite3.Error as e:
|
||||
print(f"WARNING: Cannot connect to pipeline.db: {e}", file=sys.stderr)
|
||||
# Still move files even if DB is unavailable
|
||||
conn = None
|
||||
|
||||
for cf in cascade_files:
|
||||
try:
|
||||
text = cf.read_text()
|
||||
fm = parse_frontmatter(text)
|
||||
|
||||
# Skip already-processed files
|
||||
if fm.get("status") == "processed":
|
||||
continue
|
||||
|
||||
# Log to audit_log
|
||||
if conn:
|
||||
detail = {
|
||||
"agent": agent,
|
||||
"cascade_file": cf.name,
|
||||
"subject": fm.get("subject", "unknown"),
|
||||
"original_created": fm.get("created", "unknown"),
|
||||
"reviewed_at": now,
|
||||
}
|
||||
conn.execute(
|
||||
"INSERT INTO audit_log (stage, event, detail, timestamp) VALUES (?, ?, ?, ?)",
|
||||
("cascade", "cascade_reviewed", json.dumps(detail), now),
|
||||
)
|
||||
|
||||
# Move to processed
|
||||
dest = processed_dir / cf.name
|
||||
shutil.move(str(cf), str(dest))
|
||||
processed += 1
|
||||
|
||||
except Exception as e:
|
||||
print(f"WARNING: Failed to process {cf.name}: {e}", file=sys.stderr)
|
||||
|
||||
if conn:
|
||||
try:
|
||||
conn.commit()
|
||||
conn.close()
|
||||
except sqlite3.Error:
|
||||
pass
|
||||
|
||||
return processed
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) < 2:
|
||||
print(f"Usage: {sys.argv[0]} <agent-name>", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
agent = sys.argv[1]
|
||||
count = process_agent_inbox(agent)
|
||||
if count > 0:
|
||||
print(f"Processed {count} cascade message(s) for {agent}")
|
||||
# Exit 0 regardless — non-fatal
|
||||
sys.exit(0)
|
||||
|
|
@ -1,175 +0,0 @@
|
|||
#!/bin/bash
|
||||
# Batch extract sources from inbox/queue/ — v3 with two-gate skip logic
|
||||
#
|
||||
# Uses separate extract/ worktree (not main/ — prevents daemon race condition).
|
||||
# Skip logic uses two checks instead of local marker files (Ganymede v3 review):
|
||||
# Gate 1: Is source already in archive/{domain}/? → already processed, dedup
|
||||
# Gate 2: Does extraction branch exist on Forgejo? → extraction in progress
|
||||
# Neither → extract
|
||||
#
|
||||
# Architecture: Ganymede (two-gate) + Rhea (separate worktrees)
|
||||
|
||||
REPO=/opt/teleo-eval/workspaces/extract
|
||||
MAIN_REPO=/opt/teleo-eval/workspaces/main
|
||||
EXTRACT=/opt/teleo-eval/openrouter-extract-v2.py
|
||||
CLEANUP=/opt/teleo-eval/post-extract-cleanup.py
|
||||
LOG=/opt/teleo-eval/logs/batch-extract-50.log
|
||||
TOKEN=$(cat /opt/teleo-eval/secrets/forgejo-leo-token)
|
||||
FORGEJO_URL="http://localhost:3000"
|
||||
MAX=50
|
||||
COUNT=0
|
||||
SUCCESS=0
|
||||
FAILED=0
|
||||
SKIPPED=0
|
||||
|
||||
# Lockfile to prevent concurrent runs
|
||||
LOCKFILE="/tmp/batch-extract.lock"
|
||||
if [ -f "$LOCKFILE" ]; then
|
||||
pid=$(cat "$LOCKFILE" 2>/dev/null)
|
||||
if kill -0 "$pid" 2>/dev/null; then
|
||||
echo "[$(date)] SKIP: batch extract already running (pid $pid)" >> $LOG
|
||||
exit 0
|
||||
fi
|
||||
rm -f "$LOCKFILE"
|
||||
fi
|
||||
echo $$ > "$LOCKFILE"
|
||||
trap 'rm -f "$LOCKFILE"' EXIT
|
||||
|
||||
echo "[$(date)] Starting batch extraction of $MAX sources" >> $LOG
|
||||
|
||||
cd $REPO || exit 1
|
||||
git fetch origin main 2>/dev/null
|
||||
git checkout -f main 2>/dev/null
|
||||
git reset --hard origin/main 2>/dev/null
|
||||
|
||||
# Pre-extraction cleanup: remove queue files that already exist in archive
|
||||
# This runs on the MAIN worktree (not extract/) so deletions are committed to git.
|
||||
# Prevents the "queue duplicate reappears after reset --hard" problem.
|
||||
CLEANED=0
|
||||
for qfile in $MAIN_REPO/inbox/queue/*.md; do
|
||||
[ -f "$qfile" ] || continue
|
||||
qbase=$(basename "$qfile")
|
||||
if find "$MAIN_REPO/inbox/archive" -name "$qbase" 2>/dev/null | grep -q .; then
|
||||
rm -f "$qfile"
|
||||
CLEANED=$((CLEANED + 1))
|
||||
fi
|
||||
done
|
||||
if [ "$CLEANED" -gt 0 ]; then
|
||||
echo "[$(date)] Cleaned $CLEANED stale queue duplicates" >> $LOG
|
||||
cd $MAIN_REPO
|
||||
git add -A inbox/queue/ 2>/dev/null
|
||||
git commit -m "pipeline: clean $CLEANED stale queue duplicates
|
||||
|
||||
Pentagon-Agent: Epimetheus <3D35839A-7722-4740-B93D-51157F7D5E70>" 2>/dev/null
|
||||
# Push with retry
|
||||
for attempt in 1 2 3; do
|
||||
git pull --rebase origin main 2>/dev/null
|
||||
git push origin main 2>/dev/null && break
|
||||
sleep 2
|
||||
done
|
||||
cd $REPO
|
||||
git fetch origin main 2>/dev/null
|
||||
git reset --hard origin/main 2>/dev/null
|
||||
fi
|
||||
|
||||
# Get sources in queue
|
||||
SOURCES=$(ls inbox/queue/*.md 2>/dev/null | head -$MAX)
|
||||
|
||||
# Batch fetch all remote branches once (Ganymede: 1 call instead of 84)
|
||||
REMOTE_BRANCHES=$(git ls-remote --heads origin 2>/dev/null)
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "[$(date)] ABORT: git ls-remote failed — remote unreachable, skipping cycle" >> $LOG
|
||||
exit 0
|
||||
fi
|
||||
|
||||
for SOURCE in $SOURCES; do
|
||||
COUNT=$((COUNT + 1))
|
||||
BASENAME=$(basename "$SOURCE" .md)
|
||||
BRANCH="extract/$BASENAME"
|
||||
|
||||
# Gate 1: Already in archive? Source was already processed — dedup (Ganymede)
|
||||
if find "$MAIN_REPO/inbox/archive" -name "$BASENAME.md" 2>/dev/null | grep -q .; then
|
||||
echo "[$(date)] [$COUNT/$MAX] SKIP $BASENAME (already in archive)" >> $LOG
|
||||
# Delete the queue duplicate
|
||||
rm -f "$MAIN_REPO/inbox/queue/$BASENAME.md" 2>/dev/null
|
||||
SKIPPED=$((SKIPPED + 1))
|
||||
continue
|
||||
fi
|
||||
|
||||
# Gate 2: Branch exists on Forgejo? Extraction already in progress (cached lookup)
|
||||
if echo "$REMOTE_BRANCHES" | grep -q "refs/heads/$BRANCH$"; then
|
||||
echo "[$(date)] [$COUNT/$MAX] SKIP $BASENAME (branch exists — in progress)" >> $LOG
|
||||
SKIPPED=$((SKIPPED + 1))
|
||||
continue
|
||||
fi
|
||||
|
||||
echo "[$(date)] [$COUNT/$MAX] Processing $BASENAME" >> $LOG
|
||||
|
||||
# Reset to main
|
||||
git checkout -f main 2>/dev/null
|
||||
git fetch origin main 2>/dev/null
|
||||
git reset --hard origin/main 2>/dev/null
|
||||
|
||||
# Clean stale remote branch (Leo's catch — prevents checkout conflicts)
|
||||
git push origin --delete "$BRANCH" 2>/dev/null
|
||||
|
||||
# Create fresh branch
|
||||
git branch -D "$BRANCH" 2>/dev/null
|
||||
git checkout -b "$BRANCH" 2>/dev/null
|
||||
if [ $? -ne 0 ]; then
|
||||
echo " -> SKIP (branch creation failed)" >> $LOG
|
||||
SKIPPED=$((SKIPPED + 1))
|
||||
continue
|
||||
fi
|
||||
|
||||
# Run extraction
|
||||
python3 $EXTRACT "$SOURCE" --no-review >> $LOG 2>&1
|
||||
EXTRACT_RC=$?
|
||||
|
||||
|
||||
|
||||
if [ $EXTRACT_RC -ne 0 ]; then
|
||||
FAILED=$((FAILED + 1))
|
||||
echo " -> FAILED (extract rc=$EXTRACT_RC)" >> $LOG
|
||||
continue
|
||||
fi
|
||||
|
||||
# Post-extraction cleanup
|
||||
python3 $CLEANUP $REPO >> $LOG 2>&1
|
||||
|
||||
# Check if any files were created/modified
|
||||
CHANGED=$(git status --porcelain | wc -l | tr -d " ")
|
||||
if [ "$CHANGED" -eq 0 ]; then
|
||||
echo " -> No changes (enrichment/null-result only)" >> $LOG
|
||||
continue
|
||||
fi
|
||||
|
||||
# Commit
|
||||
git add -A
|
||||
git commit -m "extract: $BASENAME
|
||||
|
||||
Pentagon-Agent: Epimetheus <3D35839A-7722-4740-B93D-51157F7D5E70>" >> $LOG 2>&1
|
||||
|
||||
# Push
|
||||
git push "http://leo:${TOKEN}@localhost:3000/teleo/teleo-codex.git" "$BRANCH" --force >> $LOG 2>&1
|
||||
|
||||
# Create PR
|
||||
curl -sf -X POST "http://localhost:3000/api/v1/repos/teleo/teleo-codex/pulls" \
|
||||
-H "Authorization: token $TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"title\":\"extract: $BASENAME\",\"head\":\"$BRANCH\",\"base\":\"main\"}" >> /dev/null 2>&1
|
||||
|
||||
SUCCESS=$((SUCCESS + 1))
|
||||
echo " -> SUCCESS ($CHANGED files)" >> $LOG
|
||||
|
||||
# Back to main
|
||||
git checkout -f main 2>/dev/null
|
||||
|
||||
# Rate limit
|
||||
sleep 2
|
||||
done
|
||||
|
||||
echo "[$(date)] Batch complete: $SUCCESS success, $FAILED failed, $SKIPPED skipped (already attempted)" >> $LOG
|
||||
|
||||
git checkout -f main 2>/dev/null
|
||||
git reset --hard origin/main 2>/dev/null
|
||||
56
deploy.sh
56
deploy.sh
|
|
@ -1,56 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# Deploy teleo-pipeline to VPS.
|
||||
# Usage: ./deploy.sh [--restart]
|
||||
#
|
||||
# Pulls latest from current branch, updates venv, optionally restarts service.
|
||||
# Run from the VPS as the teleo user, or via SSH:
|
||||
# ssh teleo@77.42.65.182 'cd /opt/teleo-eval/pipeline && ./deploy.sh --restart'
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
DEPLOY_DIR="/opt/teleo-eval/pipeline"
|
||||
VENV_DIR="${DEPLOY_DIR}/.venv"
|
||||
SERVICE="teleo-pipeline"
|
||||
|
||||
cd "$DEPLOY_DIR"
|
||||
|
||||
echo "=== Pulling latest ==="
|
||||
git pull --ff-only
|
||||
|
||||
echo "=== Updating venv ==="
|
||||
"${VENV_DIR}/bin/pip" install -q -e ".[dev]" 2>/dev/null || \
|
||||
"${VENV_DIR}/bin/pip" install -q -e .
|
||||
|
||||
echo "=== Syntax check ==="
|
||||
"${VENV_DIR}/bin/python3" -c "
|
||||
import ast, pathlib, sys
|
||||
errors = []
|
||||
for f in pathlib.Path('.').rglob('*.py'):
|
||||
if '.venv' in str(f):
|
||||
continue
|
||||
try:
|
||||
ast.parse(f.read_text())
|
||||
except SyntaxError as e:
|
||||
errors.append(f'{f}: {e}')
|
||||
if errors:
|
||||
for e in errors:
|
||||
print(f'SYNTAX ERROR: {e}', file=sys.stderr)
|
||||
sys.exit(1)
|
||||
print('All Python files pass syntax check')
|
||||
"
|
||||
|
||||
if [[ "${1:-}" == "--restart" ]]; then
|
||||
echo "=== Restarting ${SERVICE} ==="
|
||||
sudo systemctl restart "$SERVICE"
|
||||
sleep 2
|
||||
if systemctl is-active --quiet "$SERVICE"; then
|
||||
echo "=== ${SERVICE} is running ==="
|
||||
systemctl status "$SERVICE" --no-pager -l | head -15
|
||||
else
|
||||
echo "ERROR: ${SERVICE} failed to start" >&2
|
||||
journalctl -u "$SERVICE" --no-pager -n 20
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
echo "=== Deploy complete (service not restarted — use --restart to restart) ==="
|
||||
fi
|
||||
144
deploy/auto-deploy.sh
Executable file
144
deploy/auto-deploy.sh
Executable file
|
|
@ -0,0 +1,144 @@
|
|||
#!/usr/bin/env bash
|
||||
# auto-deploy.sh — Pull from Forgejo, sync to working dirs, restart if needed.
|
||||
# Runs as systemd timer (teleo-auto-deploy.timer) every 2 minutes.
|
||||
# Exits silently when nothing has changed.
|
||||
set -euo pipefail
|
||||
|
||||
LOCK_FILE="/tmp/teleo-auto-deploy.lock"
|
||||
exec 9>"$LOCK_FILE"
|
||||
if ! flock -n 9; then
|
||||
logger -t "auto-deploy" "Another deploy is already running. Skipping."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
DEPLOY_CHECKOUT="/opt/teleo-eval/workspaces/deploy-infra"
|
||||
PIPELINE_DIR="/opt/teleo-eval/pipeline"
|
||||
DIAGNOSTICS_DIR="/opt/teleo-eval/diagnostics"
|
||||
AGENT_STATE_DIR="/opt/teleo-eval/ops/agent-state"
|
||||
STAMP_FILE="/opt/teleo-eval/.last-deploy-sha"
|
||||
LOG_TAG="auto-deploy"
|
||||
|
||||
log() { logger -t "$LOG_TAG" "$1"; echo "$(date '+%Y-%m-%d %H:%M:%S') $1"; }
|
||||
|
||||
if [ ! -d "$DEPLOY_CHECKOUT/.git" ]; then
|
||||
log "ERROR: Deploy checkout not found at $DEPLOY_CHECKOUT. Run setup first."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
cd "$DEPLOY_CHECKOUT"
|
||||
if ! git fetch origin main --quiet 2>&1; then
|
||||
log "ERROR: git fetch failed"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
NEW_SHA=$(git rev-parse origin/main)
|
||||
OLD_SHA=$(cat "$STAMP_FILE" 2>/dev/null || echo "none")
|
||||
|
||||
if [ "$NEW_SHA" = "$OLD_SHA" ]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
log "New commits: ${OLD_SHA:0:8} -> ${NEW_SHA:0:8}"
|
||||
|
||||
if ! git checkout main --quiet 2>&1; then
|
||||
log "ERROR: git checkout main failed — dirty tree or corrupted index"
|
||||
exit 1
|
||||
fi
|
||||
if ! git pull --ff-only --quiet 2>&1; then
|
||||
log "ERROR: git pull --ff-only failed. Manual intervention needed."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Syntax check all Python files before copying
|
||||
ERRORS=0
|
||||
for f in lib/*.py *.py diagnostics/*.py telegram/*.py tests/*.py; do
|
||||
[ -f "$f" ] || continue
|
||||
if ! python3 -c "import ast, sys; ast.parse(open(sys.argv[1]).read())" "$f" 2>&1; then
|
||||
log "SYNTAX ERROR: $f"
|
||||
ERRORS=$((ERRORS + 1))
|
||||
fi
|
||||
done
|
||||
if [ "$ERRORS" -gt 0 ]; then
|
||||
log "ERROR: $ERRORS syntax errors. Deploy aborted. Fix and push again."
|
||||
exit 1
|
||||
fi
|
||||
log "Syntax check passed"
|
||||
|
||||
# Sync to working directories
|
||||
RSYNC_OPTS=(-az --exclude __pycache__ --exclude '*.pyc' --exclude '*.bak*')
|
||||
|
||||
rsync "${RSYNC_OPTS[@]}" lib/ "$PIPELINE_DIR/lib/"
|
||||
|
||||
for f in teleo-pipeline.py reweave.py fetch_coins.py; do
|
||||
[ -f "$f" ] && rsync "${RSYNC_OPTS[@]}" "$f" "$PIPELINE_DIR/$f"
|
||||
done
|
||||
|
||||
rsync "${RSYNC_OPTS[@]}" telegram/ "$PIPELINE_DIR/telegram/"
|
||||
rsync "${RSYNC_OPTS[@]}" diagnostics/ "$DIAGNOSTICS_DIR/"
|
||||
rsync "${RSYNC_OPTS[@]}" agent-state/ "$AGENT_STATE_DIR/"
|
||||
rsync "${RSYNC_OPTS[@]}" tests/ "$PIPELINE_DIR/tests/"
|
||||
[ -f research/research-session.sh ] && rsync "${RSYNC_OPTS[@]}" research/research-session.sh /opt/teleo-eval/research-session.sh
|
||||
|
||||
# Safety net: ensure all .sh files are executable after rsync
|
||||
find /opt/teleo-eval -maxdepth 3 -name '*.sh' -not -perm -u+x -exec chmod +x {} +
|
||||
|
||||
log "Files synced"
|
||||
|
||||
# Restart services only if Python files changed
|
||||
RESTART=""
|
||||
if [ "$OLD_SHA" != "none" ]; then
|
||||
if git diff --name-only "$OLD_SHA" "$NEW_SHA" -- lib/ teleo-pipeline.py reweave.py telegram/ 2>/dev/null | grep -q '\.py$'; then
|
||||
RESTART="$RESTART teleo-pipeline"
|
||||
fi
|
||||
if git diff --name-only "$OLD_SHA" "$NEW_SHA" -- diagnostics/ 2>/dev/null | grep -q '\.py$'; then
|
||||
RESTART="$RESTART teleo-diagnostics"
|
||||
fi
|
||||
else
|
||||
RESTART="teleo-pipeline teleo-diagnostics"
|
||||
fi
|
||||
|
||||
if [ -n "$RESTART" ]; then
|
||||
log "Restarting:$RESTART"
|
||||
sudo systemctl restart $RESTART
|
||||
sleep 30
|
||||
|
||||
FAIL=0
|
||||
for svc in $RESTART; do
|
||||
if systemctl is-active --quiet "$svc"; then
|
||||
log "$svc: active"
|
||||
else
|
||||
log "ERROR: $svc failed to start"
|
||||
journalctl -u "$svc" -n 5 --no-pager 2>/dev/null || true
|
||||
FAIL=1
|
||||
fi
|
||||
done
|
||||
|
||||
if echo "$RESTART" | grep -q "teleo-pipeline"; then
|
||||
HEALTH_CODE=$(curl -s -o /dev/null -w '%{http_code}' --connect-timeout 3 http://localhost:8080/health 2>/dev/null || echo "000")
|
||||
if [ "$HEALTH_CODE" = "200" ] || [ "$HEALTH_CODE" = "503" ]; then
|
||||
log "pipeline health: OK (HTTP $HEALTH_CODE)"
|
||||
else
|
||||
log "WARNING: pipeline health check failed (HTTP $HEALTH_CODE)"
|
||||
FAIL=1
|
||||
fi
|
||||
fi
|
||||
|
||||
if echo "$RESTART" | grep -q "teleo-diagnostics"; then
|
||||
if curl -sf --connect-timeout 3 http://localhost:8081/ops > /dev/null 2>&1; then
|
||||
log "diagnostics health: OK"
|
||||
else
|
||||
log "WARNING: diagnostics health check failed"
|
||||
FAIL=1
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ "$FAIL" -gt 0 ]; then
|
||||
log "WARNING: Smoke test failures. NOT updating stamp. Will retry next cycle. Push a fix."
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
log "No Python changes — services not restarted"
|
||||
fi
|
||||
|
||||
echo "$NEW_SHA" > "$STAMP_FILE"
|
||||
log "Deploy complete: $(git log --oneline -1 "$NEW_SHA")"
|
||||
107
deploy/deploy.sh
Executable file
107
deploy/deploy.sh
Executable file
|
|
@ -0,0 +1,107 @@
|
|||
#!/usr/bin/env bash
|
||||
# deploy.sh — Deploy pipeline and diagnostics to VPS from repo
|
||||
# Usage: ./deploy.sh [--dry-run] [--restart]
|
||||
#
|
||||
# Requires: committed, clean working tree. Enforces repo-first workflow.
|
||||
set -euo pipefail
|
||||
|
||||
VPS_HOST="teleo@77.42.65.182"
|
||||
VPS_PIPELINE="/opt/teleo-eval/pipeline"
|
||||
VPS_DIAGNOSTICS="/opt/teleo-eval/diagnostics"
|
||||
VPS_AGENT_STATE="/opt/teleo-eval/ops/agent-state"
|
||||
REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
|
||||
|
||||
DRY_RUN=false
|
||||
RESTART=false
|
||||
|
||||
for arg in "$@"; do
|
||||
case "$arg" in
|
||||
--dry-run) DRY_RUN=true ;;
|
||||
--restart) RESTART=true ;;
|
||||
--help|-h)
|
||||
echo "Usage: $0 [--dry-run] [--restart]"
|
||||
echo " --dry-run Show what would be deployed without doing it"
|
||||
echo " --restart Restart services after deploy"
|
||||
exit 0
|
||||
;;
|
||||
*) echo "Unknown arg: $arg"; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Gate: working tree must be clean
|
||||
if [ -n "$(git -C "$REPO_ROOT" status --porcelain)" ]; then
|
||||
echo "ERROR: Uncommitted changes. Commit first, deploy second."
|
||||
git -C "$REPO_ROOT" status --short
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Deploying from commit: $(git -C "$REPO_ROOT" log --oneline -1)"
|
||||
echo ""
|
||||
|
||||
# Syntax check all Python files before deploying
|
||||
echo "=== Pre-deploy syntax check ==="
|
||||
ERRORS=0
|
||||
for f in "$REPO_ROOT/lib/"*.py "$REPO_ROOT/"*.py "$REPO_ROOT/diagnostics/"*.py "$REPO_ROOT/telegram/"*.py; do
|
||||
[ -f "$f" ] || continue
|
||||
if ! python3 -c "import ast, sys; ast.parse(open(sys.argv[1]).read())" "$f" 2>/dev/null; then
|
||||
echo "SYNTAX ERROR: $f"
|
||||
ERRORS=$((ERRORS + 1))
|
||||
fi
|
||||
done
|
||||
if [ "$ERRORS" -gt 0 ]; then
|
||||
echo "ERROR: $ERRORS files have syntax errors. Fix before deploying."
|
||||
exit 1
|
||||
fi
|
||||
echo "All files pass syntax check."
|
||||
echo ""
|
||||
|
||||
RSYNC_OPTS=(-avz --exclude __pycache__ --exclude '*.pyc' --exclude '*.bak*')
|
||||
if $DRY_RUN; then
|
||||
RSYNC_OPTS+=(--dry-run)
|
||||
echo "=== DRY RUN ==="
|
||||
fi
|
||||
|
||||
echo "=== Pipeline lib/ ==="
|
||||
rsync "${RSYNC_OPTS[@]}" "$REPO_ROOT/lib/" "$VPS_HOST:$VPS_PIPELINE/lib/"
|
||||
echo ""
|
||||
|
||||
echo "=== Pipeline top-level ==="
|
||||
for f in teleo-pipeline.py reweave.py fetch_coins.py; do
|
||||
[ -f "$REPO_ROOT/$f" ] || continue
|
||||
rsync "${RSYNC_OPTS[@]}" "$REPO_ROOT/$f" "$VPS_HOST:$VPS_PIPELINE/$f"
|
||||
done
|
||||
echo ""
|
||||
|
||||
echo "=== Telegram bot ==="
|
||||
rsync "${RSYNC_OPTS[@]}" "$REPO_ROOT/telegram/" "$VPS_HOST:$VPS_PIPELINE/telegram/"
|
||||
echo ""
|
||||
|
||||
echo "=== Tests ==="
|
||||
rsync "${RSYNC_OPTS[@]}" "$REPO_ROOT/tests/" "$VPS_HOST:$VPS_PIPELINE/tests/"
|
||||
echo ""
|
||||
|
||||
echo "=== Diagnostics ==="
|
||||
rsync "${RSYNC_OPTS[@]}" "$REPO_ROOT/diagnostics/" "$VPS_HOST:$VPS_DIAGNOSTICS/"
|
||||
echo ""
|
||||
|
||||
echo "=== Agent state ==="
|
||||
rsync "${RSYNC_OPTS[@]}" "$REPO_ROOT/agent-state/" "$VPS_HOST:$VPS_AGENT_STATE/"
|
||||
echo ""
|
||||
|
||||
echo "=== Research session ==="
|
||||
rsync "${RSYNC_OPTS[@]}" "$REPO_ROOT/research/research-session.sh" "$VPS_HOST:/opt/teleo-eval/research-session.sh"
|
||||
echo ""
|
||||
|
||||
if $DRY_RUN; then
|
||||
echo "Dry run complete. No changes made."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "Deploy complete."
|
||||
|
||||
if $RESTART; then
|
||||
echo ""
|
||||
echo "=== Restarting services ==="
|
||||
ssh "$VPS_HOST" "sudo systemctl restart teleo-pipeline teleo-diagnostics"
|
||||
echo "Services restarted."
|
||||
fi
|
||||
10
deploy/fix-ownership.sh
Executable file
10
deploy/fix-ownership.sh
Executable file
|
|
@ -0,0 +1,10 @@
|
|||
#!/bin/bash
|
||||
# Fix root-owned files before pipeline starts (3rd incident — Rhea, Epimetheus)
|
||||
# Any git op running as root poisons ownership. This catches it at startup.
|
||||
find /opt/teleo-eval/workspaces -not -user teleo -exec chown teleo:teleo {} + 2>/dev/null
|
||||
find /opt/teleo-eval/pipeline -not -user teleo -exec chown teleo:teleo {} + 2>/dev/null
|
||||
find /opt/teleo-eval/entity-queue -not -user teleo -exec chown teleo:teleo {} + 2>/dev/null
|
||||
find /opt/teleo-eval/logs -not -user teleo -exec chown teleo:teleo {} + 2>/dev/null
|
||||
find /opt/teleo-eval/transcripts -not -user teleo -exec chown teleo:teleo {} + 2>/dev/null
|
||||
find /opt/teleo-eval/telegram-archives -not -user teleo -exec chown teleo:teleo {} + 2>/dev/null
|
||||
chown teleo:teleo /opt/teleo-eval/workspaces/.main-worktree.lock 2>/dev/null || true
|
||||
282
deploy/sync-mirror.sh
Executable file
282
deploy/sync-mirror.sh
Executable file
|
|
@ -0,0 +1,282 @@
|
|||
#!/bin/bash
|
||||
# Bidirectional sync: Forgejo (authoritative) <-> GitHub (public mirror)
|
||||
# Forgejo wins on conflict. Runs every 2 minutes via cron.
|
||||
#
|
||||
# Security note: GitHub->Forgejo path is for external contributor convenience.
|
||||
# Never auto-process branches arriving via this path without a PR.
|
||||
# Eval pipeline and extract cron only act on PRs, not raw branches.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
REPO_DIR="/opt/teleo-eval/mirror/teleo-codex.git"
|
||||
LOG="/opt/teleo-eval/logs/sync.log"
|
||||
LOCKFILE="/tmp/sync-mirror.lock"
|
||||
PIPELINE_DB="/opt/teleo-eval/pipeline/pipeline.db"
|
||||
GITHUB_PAT_FILE="/opt/teleo-eval/secrets/github-pat"
|
||||
GITHUB_REPO="living-ip/teleo-codex"
|
||||
|
||||
log() { echo "[$(date -Iseconds)] $1" >> "$LOG"; }
|
||||
|
||||
# Lockfile — prevent concurrent runs
|
||||
if [ -f "$LOCKFILE" ]; then
|
||||
pid=$(cat "$LOCKFILE" 2>/dev/null)
|
||||
if kill -0 "$pid" 2>/dev/null; then
|
||||
exit 0
|
||||
fi
|
||||
rm -f "$LOCKFILE"
|
||||
fi
|
||||
echo $$ > "$LOCKFILE"
|
||||
trap 'rm -f "$LOCKFILE"' EXIT
|
||||
|
||||
# Pre-flight: fix permissions if another user touched the mirror dir (Rhea)
|
||||
BAD_PERMS=$(find "$REPO_DIR" ! -user teleo 2>/dev/null | head -1 || true)
|
||||
if [ -n "$BAD_PERMS" ]; then
|
||||
log "Fixing mirror permissions (found: $BAD_PERMS)"
|
||||
chown -R teleo:teleo "$REPO_DIR" 2>/dev/null
|
||||
fi
|
||||
cd "$REPO_DIR" || { log "ERROR: cannot cd to $REPO_DIR"; exit 1; }
|
||||
|
||||
# Step 1: Fetch from Forgejo (must succeed — it's authoritative)
|
||||
log "Fetching from Forgejo..."
|
||||
if ! git fetch forgejo --prune >> "$LOG" 2>&1; then
|
||||
log "ERROR: Forgejo fetch failed — aborting"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Step 2: Fetch from GitHub (warn on failure, don't abort)
|
||||
log "Fetching from GitHub..."
|
||||
git fetch origin --prune >> "$LOG" 2>&1 || log "WARN: GitHub fetch failed"
|
||||
|
||||
# Step 2.1: Fetch GitHub fork PR refs
|
||||
# Fork-based PRs don't create branches on origin — they create refs/pull/N/head
|
||||
# Fetch these so we can push them to Forgejo for evaluation
|
||||
GITHUB_PAT_STEP2=$(cat "$GITHUB_PAT_FILE" 2>/dev/null | tr -d '[:space:]')
|
||||
if [ -n "$GITHUB_PAT_STEP2" ]; then
|
||||
OPEN_PRS=$(curl -sf "https://api.github.com/repos/$GITHUB_REPO/pulls?state=open&per_page=100" \
|
||||
-H "Authorization: token $GITHUB_PAT_STEP2" 2>/dev/null || echo "[]")
|
||||
echo "$OPEN_PRS" | python3 -c "
|
||||
import sys, json
|
||||
prs = json.load(sys.stdin)
|
||||
for pr in prs:
|
||||
head = pr.get('head', {})
|
||||
# Only process fork PRs (repo differs from base repo)
|
||||
base_repo = pr.get('base', {}).get('repo', {}).get('full_name', '')
|
||||
head_repo = head.get('repo', {}) or {}
|
||||
head_full = head_repo.get('full_name', '')
|
||||
if head_full and head_full != base_repo:
|
||||
print(f\"{pr['number']} {head.get('ref', '')} {head.get('sha', '')}\")
|
||||
" 2>/dev/null | while read pr_num branch_name head_sha; do
|
||||
if [ -z "$pr_num" ] || [ -z "$branch_name" ]; then continue; fi
|
||||
PR_BRANCH="gh-pr-${pr_num}/${branch_name}"
|
||||
# Check if we already have this ref at the right SHA
|
||||
EXISTING=$(git rev-parse "refs/heads/$PR_BRANCH" 2>/dev/null || true)
|
||||
if [ "$EXISTING" = "$head_sha" ]; then continue; fi
|
||||
# Fetch the PR ref and create a local branch
|
||||
git fetch origin "refs/pull/${pr_num}/head:refs/heads/$PR_BRANCH" >> "$LOG" 2>&1 && \
|
||||
log "Fetched fork PR #$pr_num -> $PR_BRANCH" || \
|
||||
log "WARN: Failed to fetch fork PR #$pr_num"
|
||||
done
|
||||
fi
|
||||
|
||||
# Step 2.5: GitHub main -> Forgejo main (ff-only)
|
||||
# If a PR was merged on GitHub, GitHub main is ahead of Forgejo main.
|
||||
# Fast-forward Forgejo main to match — safe because ff-only guarantees no divergence.
|
||||
GITHUB_MAIN_FF=$(git rev-parse refs/remotes/origin/main 2>/dev/null || true)
|
||||
FORGEJO_MAIN_FF=$(git rev-parse refs/remotes/forgejo/main 2>/dev/null || true)
|
||||
if [ -n "$GITHUB_MAIN_FF" ] && [ -n "$FORGEJO_MAIN_FF" ]; then
|
||||
if [ "$GITHUB_MAIN_FF" != "$FORGEJO_MAIN_FF" ]; then
|
||||
if git merge-base --is-ancestor "$FORGEJO_MAIN_FF" "$GITHUB_MAIN_FF"; then
|
||||
log "GitHub main ($GITHUB_MAIN_FF) ahead of Forgejo main ($FORGEJO_MAIN_FF) — fast-forwarding"
|
||||
git push forgejo "refs/remotes/origin/main:refs/heads/main" >> "$LOG" 2>&1 && \
|
||||
log "Forgejo main fast-forwarded to $GITHUB_MAIN_FF" || \
|
||||
log "WARN: Failed to fast-forward Forgejo main"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
# Step 3: Forgejo -> GitHub (primary direction)
|
||||
# Update local refs from Forgejo remote refs using process substitution (avoids subshell)
|
||||
log "Syncing Forgejo -> GitHub..."
|
||||
while read branch; do
|
||||
[ "$branch" = "HEAD" ] && continue
|
||||
git update-ref "refs/heads/$branch" "refs/remotes/forgejo/$branch" 2>/dev/null || \
|
||||
log "WARN: Failed to update ref $branch"
|
||||
done < <(git for-each-ref --format="%(refname:lstrip=3)" refs/remotes/forgejo/)
|
||||
|
||||
# Safety: verify Forgejo main descends from GitHub main before force-pushing
|
||||
GITHUB_MAIN=$(git rev-parse refs/remotes/origin/main 2>/dev/null || true)
|
||||
FORGEJO_MAIN=$(git rev-parse refs/remotes/forgejo/main 2>/dev/null || true)
|
||||
PUSH_MAIN=true
|
||||
if [ -n "$GITHUB_MAIN" ] && [ -n "$FORGEJO_MAIN" ]; then
|
||||
if ! git merge-base --is-ancestor "$GITHUB_MAIN" "$FORGEJO_MAIN"; then
|
||||
log "CRITICAL: Forgejo main is NOT a descendant of GitHub main — skipping main push"
|
||||
log "CRITICAL: GitHub main: $GITHUB_MAIN, Forgejo main: $FORGEJO_MAIN"
|
||||
PUSH_MAIN=false
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ "$PUSH_MAIN" = true ]; then
|
||||
git push origin --all --force >> "$LOG" 2>&1 || log "WARN: Push to GitHub failed"
|
||||
else
|
||||
# Push all branches except main
|
||||
while read branch; do
|
||||
[ "$branch" = "main" ] && continue
|
||||
[ "$branch" = "HEAD" ] && continue
|
||||
git push origin --force "refs/heads/$branch:refs/heads/$branch" >> "$LOG" 2>&1 || \
|
||||
log "WARN: Failed to push $branch to GitHub"
|
||||
done < <(git for-each-ref --format="%(refname:lstrip=2)" refs/heads/)
|
||||
fi
|
||||
git push origin --tags --force >> "$LOG" 2>&1 || log "WARN: Tag push to GitHub failed"
|
||||
|
||||
# Step 4: GitHub -> Forgejo (external contributions only)
|
||||
# Only push branches that exist on GitHub but NOT on Forgejo
|
||||
log "Checking GitHub-only branches..."
|
||||
GITHUB_ONLY=$(comm -23 \
|
||||
<(git for-each-ref --format="%(refname:lstrip=3)" refs/remotes/origin/ | grep -v HEAD | sort) \
|
||||
<(git for-each-ref --format="%(refname:lstrip=3)" refs/remotes/forgejo/ | grep -v HEAD | sort))
|
||||
|
||||
if [ -n "$GITHUB_ONLY" ]; then
|
||||
FORGEJO_TOKEN=$(cat /opt/teleo-eval/secrets/forgejo-admin-token 2>/dev/null)
|
||||
for branch in $GITHUB_ONLY; do
|
||||
log "New from GitHub: $branch -> Forgejo"
|
||||
# Fork PR branches live as local refs (from Step 2.1), not on origin remote
|
||||
if [[ "$branch" == gh-pr-* ]]; then
|
||||
git push forgejo "refs/heads/$branch:refs/heads/$branch" >> "$LOG" 2>&1 || {
|
||||
log "WARN: Failed to push fork PR branch $branch to Forgejo"
|
||||
continue
|
||||
}
|
||||
else
|
||||
git push forgejo "refs/remotes/origin/$branch:refs/heads/$branch" >> "$LOG" 2>&1 || {
|
||||
log "WARN: Failed to push $branch to Forgejo"
|
||||
continue
|
||||
}
|
||||
fi
|
||||
# Auto-create PR on Forgejo for mirrored branches (external contributor path)
|
||||
# Skip pipeline-internal branches
|
||||
case "$branch" in
|
||||
extract/*|ingestion/*) continue ;;
|
||||
esac
|
||||
if [ -n "$FORGEJO_TOKEN" ]; then
|
||||
# Check if PR already exists for this branch (open or closed)
|
||||
# NOTE: Forgejo ?head= filter is broken (ignores head value, returns all PRs).
|
||||
# Workaround: fetch open+closed PRs, pipe to Python, check head.ref.
|
||||
HAS_PR=$( {
|
||||
curl -sf "http://localhost:3000/api/v1/repos/teleo/teleo-codex/pulls?state=open&limit=50" \
|
||||
-H "Authorization: token $FORGEJO_TOKEN" 2>/dev/null || echo "[]"
|
||||
echo ""
|
||||
curl -sf "http://localhost:3000/api/v1/repos/teleo/teleo-codex/pulls?state=closed&sort=created&limit=50" \
|
||||
-H "Authorization: token $FORGEJO_TOKEN" 2>/dev/null || echo "[]"
|
||||
} | python3 -c "
|
||||
import sys, json
|
||||
branch = sys.argv[1]
|
||||
for line in sys.stdin:
|
||||
line = line.strip()
|
||||
if not line or line == '[]': continue
|
||||
try:
|
||||
for pr in json.loads(line):
|
||||
if pr.get('head', {}).get('ref') == branch:
|
||||
print('yes'); sys.exit(0)
|
||||
except: pass
|
||||
print('no')
|
||||
" "$branch" 2>/dev/null || echo "no")
|
||||
if [ "$HAS_PR" = "no" ]; then
|
||||
# Build PR title — for fork PRs, use the GitHub PR title
|
||||
if [[ "$branch" == gh-pr-* ]]; then
|
||||
FORK_GH_NUM=$(echo "$branch" | sed 's|gh-pr-\([0-9]*\)/.*|\1|')
|
||||
GITHUB_PAT_T=$(cat "$GITHUB_PAT_FILE" 2>/dev/null | tr -d '[:space:]')
|
||||
PR_TITLE=$(curl -sf "https://api.github.com/repos/$GITHUB_REPO/pulls/$FORK_GH_NUM" \
|
||||
-H "Authorization: token $GITHUB_PAT_T" 2>/dev/null | \
|
||||
python3 -c "import sys,json; print(json.load(sys.stdin).get('title',''))" 2>/dev/null || true)
|
||||
[ -z "$PR_TITLE" ] && PR_TITLE=$(echo "$branch" | sed 's|/|: |;s/-/ /g')
|
||||
else
|
||||
PR_TITLE=$(echo "$branch" | sed 's|/|: |;s/-/ /g')
|
||||
fi
|
||||
PAYLOAD=$(python3 -c "import sys,json; print(json.dumps({'title':sys.argv[1],'head':sys.argv[2],'base':'main'}))" "$PR_TITLE" "$branch")
|
||||
RESULT=$(curl -sf -X POST "http://localhost:3000/api/v1/repos/teleo/teleo-codex/pulls" \
|
||||
-H "Authorization: token $FORGEJO_TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$PAYLOAD" 2>/dev/null || echo "")
|
||||
PR_NUM=$(echo "$RESULT" | grep -o '"number":[0-9]*' | head -1 | grep -o "[0-9]*" || true)
|
||||
if [ -n "$PR_NUM" ]; then
|
||||
log "Auto-created PR #$PR_NUM on Forgejo for $branch"
|
||||
# Step 4.5: Link GitHub PR to Forgejo PR in pipeline DB
|
||||
if [[ "$branch" == gh-pr-* ]]; then
|
||||
GH_PR_NUM=$(echo "$branch" | sed 's|gh-pr-\([0-9]*\)/.*|\1|')
|
||||
else
|
||||
GITHUB_PAT=$(cat "$GITHUB_PAT_FILE" 2>/dev/null | tr -d '[:space:]')
|
||||
GH_PR_NUM=""
|
||||
if [ -n "$GITHUB_PAT" ]; then
|
||||
GH_PR_NUM=$(curl -sf "https://api.github.com/repos/$GITHUB_REPO/pulls?head=living-ip:$branch&state=all" \
|
||||
-H "Authorization: token $GITHUB_PAT" 2>/dev/null | \
|
||||
python3 -c "import sys,json; prs=json.load(sys.stdin); print(prs[0]['number'] if prs else '')" 2>/dev/null || true)
|
||||
fi
|
||||
fi
|
||||
if [[ "$GH_PR_NUM" =~ ^[0-9]+$ ]] && [[ "$PR_NUM" =~ ^[0-9]+$ ]]; then
|
||||
sqlite3 "$PIPELINE_DB" "UPDATE prs SET github_pr = $GH_PR_NUM WHERE number = $PR_NUM;" 2>/dev/null && \
|
||||
log "Linked GitHub PR #$GH_PR_NUM -> Forgejo PR #$PR_NUM" || \
|
||||
log "WARN: Failed to link GitHub PR #$GH_PR_NUM to Forgejo PR #$PR_NUM in DB"
|
||||
fi
|
||||
else
|
||||
log "WARN: Failed to auto-create PR for $branch"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
done
|
||||
else
|
||||
log "No new GitHub-only branches"
|
||||
fi
|
||||
|
||||
# Step 6: Divergence alerting
|
||||
# After all sync steps, check if GitHub and Forgejo main still differ.
|
||||
# 2 consecutive divergent cycles (4 min) triggers a one-shot Telegram alert.
|
||||
DIVERGENCE_FILE="/opt/teleo-eval/logs/.divergence-count"
|
||||
git fetch forgejo main --quiet 2>/dev/null || true
|
||||
git fetch origin main --quiet 2>/dev/null || true
|
||||
GH_MAIN_FINAL=$(git rev-parse refs/remotes/origin/main 2>/dev/null || true)
|
||||
FG_MAIN_FINAL=$(git rev-parse refs/remotes/forgejo/main 2>/dev/null || true)
|
||||
|
||||
if [ -n "$GH_MAIN_FINAL" ] && [ -n "$FG_MAIN_FINAL" ] && [ "$GH_MAIN_FINAL" != "$FG_MAIN_FINAL" ]; then
|
||||
PREV=$(cat "$DIVERGENCE_FILE" 2>/dev/null || echo "0")
|
||||
if [ "$PREV" = "alerted" ]; then
|
||||
log "DIVERGENCE: still diverged (already alerted)"
|
||||
else
|
||||
COUNT=$((PREV + 1))
|
||||
echo "$COUNT" > "$DIVERGENCE_FILE"
|
||||
log "DIVERGENCE: cycle $COUNT — GitHub=$GH_MAIN_FINAL Forgejo=$FG_MAIN_FINAL"
|
||||
if [ "$COUNT" -ge 2 ]; then
|
||||
BOT_TOKEN=$(cat /opt/teleo-eval/secrets/telegram-bot-token 2>/dev/null || true)
|
||||
ADMIN_CHAT=$(cat /opt/teleo-eval/secrets/admin-chat-id 2>/dev/null || true)
|
||||
if [ -n "$BOT_TOKEN" ] && [ -n "$ADMIN_CHAT" ]; then
|
||||
ALERT_MSG=$(python3 -c "
|
||||
import json, sys
|
||||
msg = '⚠️ Mirror divergence detected\\n\\n'
|
||||
msg += f'GitHub main: {sys.argv[1][:8]}\\n'
|
||||
msg += f'Forgejo main: {sys.argv[2][:8]}\\n'
|
||||
msg += f'Diverged for {sys.argv[3]} consecutive cycles ({int(sys.argv[3])*2} min)\\n\\n'
|
||||
msg += 'Check sync-mirror.sh logs: /opt/teleo-eval/logs/sync.log'
|
||||
print(json.dumps({'chat_id': sys.argv[4], 'text': msg, 'parse_mode': 'HTML'}))
|
||||
" "$GH_MAIN_FINAL" "$FG_MAIN_FINAL" "$COUNT" "$ADMIN_CHAT")
|
||||
if curl -sf -X POST "https://api.telegram.org/bot${BOT_TOKEN}/sendMessage" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$ALERT_MSG" >> "$LOG" 2>&1; then
|
||||
log "DIVERGENCE: alert sent to admin"
|
||||
echo "alerted" > "$DIVERGENCE_FILE"
|
||||
else
|
||||
log "WARN: Failed to send divergence alert (will retry next cycle)"
|
||||
fi
|
||||
else
|
||||
log "WARN: Cannot send divergence alert — missing bot token or admin chat ID"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
else
|
||||
if [ -f "$DIVERGENCE_FILE" ]; then
|
||||
PREV=$(cat "$DIVERGENCE_FILE" 2>/dev/null || echo "0")
|
||||
if [ "$PREV" != "0" ]; then
|
||||
log "DIVERGENCE: resolved — repos back in sync"
|
||||
fi
|
||||
rm -f "$DIVERGENCE_FILE"
|
||||
fi
|
||||
fi
|
||||
|
||||
log "Sync complete"
|
||||
329
diagnostics/activity_endpoint.py
Normal file
329
diagnostics/activity_endpoint.py
Normal file
|
|
@ -0,0 +1,329 @@
|
|||
"""
|
||||
/api/activity endpoint for diagnostics service.
|
||||
|
||||
Serves per-operation events for the dashboard v2 timeline hero panel.
|
||||
Derives events from the prs table (per-PR granularity) and audit_log
|
||||
(pipeline-level ops). Cursor-based pagination via timestamp.
|
||||
|
||||
Integration: add route and handler to app.py:
|
||||
app.router.add_get('/api/activity', handle_activity)
|
||||
|
||||
Contract (endpoint #7):
|
||||
GET /api/activity?limit=100&cursor=<ISO-timestamp>
|
||||
Response: {
|
||||
events: [{timestamp, agent, operation, target, domain, description, status, pr_number}],
|
||||
limit: int,
|
||||
cursor: string|null,
|
||||
has_more: bool
|
||||
}
|
||||
|
||||
Data sources:
|
||||
- prs table: number, status, domain, agent, created_at, merged_at, branch, source_path
|
||||
- audit_log table: timestamp, stage, event, detail
|
||||
- contributors table: handle, display_name (for agent name resolution)
|
||||
"""
|
||||
|
||||
from aiohttp import web
|
||||
import sqlite3
|
||||
import json
|
||||
|
||||
|
||||
# Non-merged statuses map directly to operation — no semantic classification yet.
|
||||
NON_MERGED_STATUS_TO_OPERATION = {
|
||||
'approved': 'new', # about to become knowledge
|
||||
'open': 'extract', # cyan — new extraction in progress
|
||||
'validating': 'extract', # cyan — being validated
|
||||
'reviewing': 'extract', # cyan — under review
|
||||
'merging': 'new', # green — merge in progress
|
||||
'closed': 'infra', # grey — closed/rejected
|
||||
'zombie': 'infra', # grey — stale
|
||||
'conflict': 'challenge', # red-orange — conflict detected
|
||||
}
|
||||
|
||||
# Maintenance commit_types that land on main but don't represent new knowledge.
|
||||
_MAINTENANCE_COMMIT_TYPES = {'fix', 'pipeline', 'reweave'}
|
||||
|
||||
|
||||
def classify_pr_operation(status, commit_type, branch, description=None):
|
||||
"""Derive a Timeline operation from a PR row.
|
||||
|
||||
Priority order for MERGED PRs (commit_type wins over branch prefix —
|
||||
extract/* branches with commit_type='enrich' or 'challenge' classify
|
||||
by commit_type, matching the contributor-role wiring fix):
|
||||
1. commit_type == 'challenge' OR branch.startswith('challenge/') OR
|
||||
description contains 'challenged_by' → 'challenge'
|
||||
2. commit_type == 'enrich' OR branch.startswith('enrich/' | 'reweave/')
|
||||
→ 'enrich'
|
||||
3. commit_type in _MAINTENANCE_COMMIT_TYPES → 'infra'
|
||||
4. default (commit_type='knowledge'|'extract'|'research'|'entity' or
|
||||
anything else) → 'new'
|
||||
|
||||
For non-merged PRs, falls back to NON_MERGED_STATUS_TO_OPERATION.
|
||||
"""
|
||||
commit_type = (commit_type or '').lower()
|
||||
branch = branch or ''
|
||||
description_lower = (description or '').lower()
|
||||
|
||||
if status != 'merged':
|
||||
return NON_MERGED_STATUS_TO_OPERATION.get(status, 'infra')
|
||||
|
||||
# Challenge takes precedence — the signal is inherently more specific.
|
||||
if (commit_type == 'challenge'
|
||||
or branch.startswith('challenge/')
|
||||
or 'challenged_by' in description_lower):
|
||||
return 'challenge'
|
||||
|
||||
if (commit_type == 'enrich'
|
||||
or branch.startswith('enrich/')
|
||||
or branch.startswith('reweave/')):
|
||||
return 'enrich'
|
||||
|
||||
if commit_type in _MAINTENANCE_COMMIT_TYPES:
|
||||
return 'infra'
|
||||
|
||||
# Default: legacy 'knowledge', new 'extract', 'research', 'entity',
|
||||
# unknown/null commit_type → treat as new knowledge.
|
||||
return 'new'
|
||||
|
||||
# Map audit_log stage to operation type
|
||||
STAGE_TO_OPERATION = {
|
||||
'ingest': 'extract',
|
||||
'extract': 'extract',
|
||||
'validate': 'infra',
|
||||
'evaluate': 'infra',
|
||||
'merge': 'new',
|
||||
'reject': 'infra',
|
||||
'breaker': 'challenge',
|
||||
}
|
||||
|
||||
|
||||
def pr_description(row):
|
||||
"""Generate human-readable description from a PR row."""
|
||||
status = row['status']
|
||||
domain = row['domain'] or 'unknown'
|
||||
branch = row['branch'] or ''
|
||||
|
||||
# Extract a meaningful target from the branch name
|
||||
# Branch format is typically: agent-name/claims-description
|
||||
target = branch.split('/')[-1] if '/' in branch else branch
|
||||
|
||||
# Infer agent from branch prefix if not in the row
|
||||
branch_agent = branch.split('/')[0] if '/' in branch else None
|
||||
|
||||
# Build a richer description with domain context
|
||||
domain_tag = f" [{domain}]" if domain and domain != 'unknown' and domain != 'general' else ''
|
||||
|
||||
templates = {
|
||||
'merged': f"Merged{domain_tag}: {target}",
|
||||
'approved': f"Approved{domain_tag}: {target}",
|
||||
'open': f"Opened{domain_tag}: {target}",
|
||||
'validating': f"Validating{domain_tag}: {target}",
|
||||
'reviewing': f"Reviewing{domain_tag}: {target}",
|
||||
'merging': f"Merging{domain_tag}: {target}",
|
||||
'closed': f"Closed{domain_tag}: {target}",
|
||||
'zombie': f"Stale{domain_tag}: {target}",
|
||||
'conflict': f"Conflict{domain_tag}: {target}",
|
||||
}
|
||||
|
||||
return templates.get(status, f"PR #{row['number']}{domain_tag}: {target}")
|
||||
|
||||
|
||||
def audit_description(row):
|
||||
"""Generate human-readable description from an audit_log row."""
|
||||
stage = row['stage'] or ''
|
||||
event = row['event'] or ''
|
||||
detail = row['detail'] or ''
|
||||
|
||||
# Try to parse detail as JSON
|
||||
if detail:
|
||||
try:
|
||||
detail_obj = json.loads(detail)
|
||||
if isinstance(detail_obj, dict):
|
||||
msg = detail_obj.get('message') or detail_obj.get('reason', '')
|
||||
if msg:
|
||||
return f"[{stage}] {msg}"[:150]
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
pass
|
||||
|
||||
if event:
|
||||
desc = f"[{stage}] {event}"
|
||||
if detail and len(detail) < 80:
|
||||
desc += f" — {detail}"
|
||||
return desc[:150]
|
||||
|
||||
return f"[{stage}] pipeline event"
|
||||
|
||||
|
||||
async def handle_activity(request):
|
||||
"""Handler for GET /api/activity.
|
||||
|
||||
Query params:
|
||||
limit (int, default 100, max 500): number of events to return
|
||||
cursor (ISO timestamp): return events older than this timestamp
|
||||
type (str, optional): comma-separated operation types to include
|
||||
(extract|new|enrich|challenge|infra). If absent, returns all types.
|
||||
|
||||
Derives events from two sources:
|
||||
1. prs table — per-PR events with domain, agent, status
|
||||
2. audit_log — pipeline-level operational events
|
||||
|
||||
Events are merged and sorted by timestamp descending (most recent first).
|
||||
"""
|
||||
try:
|
||||
limit = min(int(request.query.get('limit', 100)), 500)
|
||||
except (ValueError, TypeError):
|
||||
limit = 100
|
||||
|
||||
cursor = request.query.get('cursor')
|
||||
type_param = request.query.get('type', '').strip()
|
||||
allowed_ops = None
|
||||
if type_param:
|
||||
allowed_ops = {t.strip() for t in type_param.split(',') if t.strip()}
|
||||
if not allowed_ops:
|
||||
allowed_ops = None
|
||||
|
||||
db_path = request.app['db_path']
|
||||
|
||||
try:
|
||||
conn = sqlite3.connect(f'file:{db_path}?mode=ro', uri=True)
|
||||
conn.row_factory = sqlite3.Row
|
||||
|
||||
events = []
|
||||
|
||||
# Source 1: PR events (primary — these have the granularity we need)
|
||||
# Each PR generates events at created_at and merged_at timestamps
|
||||
pr_query = """
|
||||
SELECT number, status, domain, agent, branch, source_path,
|
||||
created_at, merged_at, source_channel, commit_type,
|
||||
description
|
||||
FROM prs
|
||||
WHERE {where_clause}
|
||||
ORDER BY COALESCE(merged_at, created_at) DESC
|
||||
LIMIT ?
|
||||
"""
|
||||
|
||||
# Over-fetch when filtering by type so we have enough matching rows after
|
||||
# post-build filtering. Cap at 2000 to avoid runaway queries.
|
||||
fetch_limit = min(2000, limit * 5) if allowed_ops else limit + 1
|
||||
|
||||
if cursor:
|
||||
rows = conn.execute(
|
||||
pr_query.format(where_clause="COALESCE(merged_at, created_at) < ?"),
|
||||
(cursor, fetch_limit)
|
||||
).fetchall()
|
||||
else:
|
||||
rows = conn.execute(
|
||||
pr_query.format(where_clause="1=1"),
|
||||
(fetch_limit,)
|
||||
).fetchall()
|
||||
|
||||
# Known knowledge agents for branch-prefix inference
|
||||
knowledge_agents = {'rio', 'clay', 'theseus', 'vida', 'astra', 'leo'}
|
||||
|
||||
for row in rows:
|
||||
row_dict = dict(row)
|
||||
operation = classify_pr_operation(
|
||||
row_dict['status'],
|
||||
row_dict.get('commit_type'),
|
||||
row_dict.get('branch'),
|
||||
row_dict.get('description'),
|
||||
)
|
||||
if allowed_ops and operation not in allowed_ops:
|
||||
continue
|
||||
description = pr_description(row_dict)
|
||||
|
||||
# Use merged_at if available (more interesting event), else created_at
|
||||
timestamp = row_dict['merged_at'] or row_dict['created_at']
|
||||
|
||||
# Infer agent from branch prefix if DB column is null
|
||||
# Branch format: agent-name/claims-description
|
||||
agent = row_dict['agent']
|
||||
if not agent and row_dict.get('branch'):
|
||||
prefix = row_dict['branch'].split('/')[0].lower()
|
||||
if prefix in knowledge_agents:
|
||||
agent = prefix
|
||||
|
||||
events.append({
|
||||
'timestamp': timestamp,
|
||||
'agent': agent,
|
||||
'operation': operation,
|
||||
'target': (row_dict['branch'] or '').split('/')[-1] if row_dict['branch'] else None,
|
||||
'domain': row_dict['domain'],
|
||||
'description': description,
|
||||
'status': row_dict['status'],
|
||||
'pr_number': row_dict['number'],
|
||||
'source_channel': row_dict.get('source_channel') or 'unknown',
|
||||
})
|
||||
|
||||
# Source 2: Audit log events (secondary — pipeline-level)
|
||||
# Only include if we haven't hit our limit from PRs alone
|
||||
if len(events) < limit:
|
||||
remaining = limit - len(events) + 1
|
||||
audit_query = """
|
||||
SELECT timestamp, stage, event, detail
|
||||
FROM audit_log
|
||||
WHERE {where_clause}
|
||||
ORDER BY timestamp DESC
|
||||
LIMIT ?
|
||||
"""
|
||||
|
||||
if cursor:
|
||||
audit_rows = conn.execute(
|
||||
audit_query.format(where_clause="timestamp < ?"),
|
||||
(cursor, remaining)
|
||||
).fetchall()
|
||||
else:
|
||||
audit_rows = conn.execute(
|
||||
audit_query.format(where_clause="1=1"),
|
||||
(remaining,)
|
||||
).fetchall()
|
||||
|
||||
for row in audit_rows:
|
||||
row_dict = dict(row)
|
||||
operation = STAGE_TO_OPERATION.get(row_dict['stage'], 'infra')
|
||||
if allowed_ops and operation not in allowed_ops:
|
||||
continue
|
||||
description = audit_description(row_dict)
|
||||
|
||||
events.append({
|
||||
'timestamp': row_dict['timestamp'],
|
||||
'agent': None, # audit_log has no agent column
|
||||
'operation': operation,
|
||||
'target': None,
|
||||
'domain': None,
|
||||
'description': description,
|
||||
'status': None,
|
||||
'pr_number': None,
|
||||
'source_channel': None, # audit events not tied to a PR
|
||||
})
|
||||
|
||||
conn.close()
|
||||
except sqlite3.Error as e:
|
||||
return web.json_response({'error': f'Database error: {e}'}, status=500)
|
||||
|
||||
# Sort all events by timestamp descending
|
||||
events.sort(key=lambda e: e['timestamp'] or '', reverse=True)
|
||||
|
||||
# Apply limit and check for more
|
||||
has_more = len(events) > limit
|
||||
events = events[:limit]
|
||||
|
||||
# Cursor is the timestamp of the last event returned
|
||||
next_cursor = events[-1]['timestamp'] if events else None
|
||||
|
||||
return web.json_response({
|
||||
'events': events,
|
||||
'limit': limit,
|
||||
'cursor': next_cursor,
|
||||
'has_more': has_more,
|
||||
})
|
||||
|
||||
|
||||
# --- Integration snippet for app.py ---
|
||||
# Add to your route setup:
|
||||
#
|
||||
# from activity_endpoint import handle_activity
|
||||
# app.router.add_get('/api/activity', handle_activity)
|
||||
#
|
||||
# Requires: app['db_path'] set to the pipeline.db path
|
||||
# e.g.: app['db_path'] = '/opt/teleo-eval/pipeline/pipeline.db'
|
||||
288
diagnostics/activity_feed_api.py
Normal file
288
diagnostics/activity_feed_api.py
Normal file
|
|
@ -0,0 +1,288 @@
|
|||
"""Activity feed API — serves contribution events from pipeline.db."""
|
||||
import re
|
||||
import sqlite3
|
||||
import math
|
||||
import time
|
||||
from aiohttp import web
|
||||
|
||||
DB_PATH = "/opt/teleo-eval/pipeline/pipeline.db"
|
||||
_cache = {"data": None, "ts": 0}
|
||||
CACHE_TTL = 60 # 1 minute — activity should feel fresh
|
||||
|
||||
# commit_types we surface in the activity feed. `pipeline` is system
|
||||
# maintenance (reweave/fix auto-runs, zombie cleanup) and stays hidden.
|
||||
_FEED_COMMIT_TYPES = ("knowledge", "enrich", "challenge", "research", "entity", "extract", "reweave")
|
||||
|
||||
# Source-archive slugs follow YYYY-MM-DD-publisher-topic-HASH4 — they're
|
||||
# inbox archive filenames, not claim slugs. Used as a fallback signal when
|
||||
# branch/description heuristics miss (e.g. populated descriptions that
|
||||
# happen to be source titles, not claim insights).
|
||||
_SOURCE_SLUG_PATTERN = re.compile(r"^\d{4}-\d{2}-\d{2}-.+-[a-f0-9]{4}$")
|
||||
|
||||
|
||||
def _get_conn():
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
conn.row_factory = sqlite3.Row
|
||||
conn.execute("PRAGMA busy_timeout = 10000")
|
||||
return conn
|
||||
|
||||
|
||||
def _is_source_slug(slug):
|
||||
return bool(slug and _SOURCE_SLUG_PATTERN.match(slug))
|
||||
|
||||
|
||||
def _classify_event(branch, description, commit_type, candidate_slug=None):
|
||||
"""Return one of: create | enrich | challenge | source | None.
|
||||
|
||||
Source-archive PRs are extract/* branches that filed a source into
|
||||
inbox/archive/ but didn't produce a claim. Two signals classify them
|
||||
as 'source' (defense in depth):
|
||||
1. extract/* branch with empty description (no claim title produced)
|
||||
2. candidate_slug matches YYYY-MM-DD-...-HASH4 (inbox filename pattern)
|
||||
"""
|
||||
commit_type_l = (commit_type or "").lower()
|
||||
branch = branch or ""
|
||||
description_lower = (description or "").lower()
|
||||
has_desc = bool(description and description.strip())
|
||||
|
||||
if commit_type_l not in _FEED_COMMIT_TYPES:
|
||||
return None
|
||||
|
||||
# Explicit challenge signals win first.
|
||||
if (commit_type_l == "challenge"
|
||||
or branch.startswith("challenge/")
|
||||
or "challenged_by" in description_lower):
|
||||
return "challenge"
|
||||
|
||||
# Enrichment: reweave edge-connects, enrich/ branches, or commit_type=enrich.
|
||||
if (commit_type_l == "enrich"
|
||||
or branch.startswith("enrich/")
|
||||
or branch.startswith("reweave/")):
|
||||
return "enrich"
|
||||
|
||||
# Source-only: extract/* with no claim description means inbox archive
|
||||
# landed but no domain claim was written.
|
||||
if branch.startswith("extract/") and not has_desc:
|
||||
return "source"
|
||||
|
||||
# Belt-and-suspenders: if the slug we'd surface to the frontend looks
|
||||
# like an inbox archive filename (date-prefix-hash), treat as source
|
||||
# regardless of branch/commit_type/description state. Catches cases
|
||||
# where description leaked but is just a source title, not a claim.
|
||||
if _is_source_slug(candidate_slug):
|
||||
return "source"
|
||||
|
||||
# Everything else with a description is a new claim.
|
||||
return "create"
|
||||
|
||||
|
||||
def _normalize_contributor(submitted_by, agent):
|
||||
if submitted_by and submitted_by.strip():
|
||||
name = submitted_by.strip().lstrip("@")
|
||||
return name
|
||||
if agent and agent.strip() and agent != "pipeline":
|
||||
return agent.strip()
|
||||
return "pipeline"
|
||||
|
||||
|
||||
def _summary_from_branch(branch):
|
||||
if not branch:
|
||||
return ""
|
||||
parts = branch.split("/", 1)
|
||||
if len(parts) < 2:
|
||||
return ""
|
||||
slug = parts[1]
|
||||
slug = re.sub(r"^[\d-]+-", "", slug) # strip date prefix
|
||||
slug = re.sub(r"-[a-f0-9]{4}$", "", slug) # strip hash suffix
|
||||
return slug.replace("-", " ").strip().capitalize()
|
||||
|
||||
|
||||
def _extract_claim_slugs(description, branch=None):
|
||||
if not description:
|
||||
if branch:
|
||||
parts = branch.split("/", 1)
|
||||
if len(parts) > 1:
|
||||
return [parts[1][:120]]
|
||||
return []
|
||||
titles = [t.strip() for t in description.split("|") if t.strip()]
|
||||
slugs = []
|
||||
for title in titles:
|
||||
slug = title.lower().strip()
|
||||
slug = "".join(c if c.isalnum() or c in (" ", "-") else "" for c in slug)
|
||||
slug = slug.replace(" ", "-").strip("-")
|
||||
if len(slug) > 10:
|
||||
slugs.append(slug[:120])
|
||||
return slugs
|
||||
|
||||
|
||||
def _hot_score(challenge_count, enrich_count, signal_count, hours_since):
|
||||
numerator = challenge_count * 3 + enrich_count * 2 + signal_count
|
||||
denominator = max(hours_since, 0.5) ** 1.5
|
||||
return numerator / denominator
|
||||
|
||||
|
||||
def _build_events():
|
||||
conn = _get_conn()
|
||||
try:
|
||||
placeholders = ",".join("?" * len(_FEED_COMMIT_TYPES))
|
||||
rows = conn.execute(f"""
|
||||
SELECT p.number, p.branch, p.domain, p.agent, p.submitted_by,
|
||||
p.merged_at, p.description, p.commit_type, p.cost_usd,
|
||||
p.source_channel, p.source_path
|
||||
FROM prs p
|
||||
WHERE p.status = 'merged'
|
||||
AND p.commit_type IN ({placeholders})
|
||||
AND p.merged_at IS NOT NULL
|
||||
ORDER BY p.merged_at DESC
|
||||
LIMIT 2000
|
||||
""", _FEED_COMMIT_TYPES).fetchall()
|
||||
|
||||
events = []
|
||||
claim_activity = {} # slug -> {challenges, enriches, signals, first_seen}
|
||||
|
||||
for row in rows:
|
||||
slugs = _extract_claim_slugs(row["description"], row["branch"])
|
||||
candidate_slug = slugs[0] if slugs else ""
|
||||
event_type = _classify_event(
|
||||
row["branch"], row["description"], row["commit_type"],
|
||||
candidate_slug=candidate_slug,
|
||||
)
|
||||
if not event_type:
|
||||
continue
|
||||
|
||||
contributor = _normalize_contributor(row["submitted_by"], row["agent"])
|
||||
merged_at = row["merged_at"] or ""
|
||||
|
||||
ci_map = {"create": 0.35, "enrich": 0.25, "challenge": 0.40, "source": 0.15}
|
||||
ci_earned = ci_map.get(event_type, 0)
|
||||
|
||||
# Source events never carry a claim_slug — no claim was written —
|
||||
# so the frontend can't produce a 404-ing claim link.
|
||||
if event_type == "source":
|
||||
summary_text = _summary_from_branch(row["branch"])
|
||||
source_slug = (
|
||||
_summary_from_branch(row["branch"]).lower().replace(" ", "-")
|
||||
or row["branch"]
|
||||
)
|
||||
events.append({
|
||||
"type": "source",
|
||||
"claim_slug": "",
|
||||
"source_slug": source_slug,
|
||||
"domain": row["domain"] or "unknown",
|
||||
"contributor": contributor,
|
||||
"timestamp": merged_at,
|
||||
"ci_earned": round(ci_earned, 2),
|
||||
"summary": summary_text,
|
||||
"pr_number": row["number"],
|
||||
"source_channel": row["source_channel"] or "unknown",
|
||||
})
|
||||
continue
|
||||
|
||||
for slug in slugs:
|
||||
if slug not in claim_activity:
|
||||
claim_activity[slug] = {
|
||||
"challenges": 0, "enriches": 0, "signals": 0,
|
||||
"first_seen": merged_at,
|
||||
}
|
||||
if event_type == "challenge":
|
||||
claim_activity[slug]["challenges"] += 1
|
||||
elif event_type == "enrich":
|
||||
claim_activity[slug]["enriches"] += 1
|
||||
else:
|
||||
claim_activity[slug]["signals"] += 1
|
||||
|
||||
summary_text = ""
|
||||
if row["description"]:
|
||||
first_title = row["description"].split("|")[0].strip()
|
||||
if len(first_title) > 120:
|
||||
first_title = first_title[:117] + "..."
|
||||
summary_text = first_title
|
||||
elif row["branch"]:
|
||||
summary_text = _summary_from_branch(row["branch"])
|
||||
|
||||
for slug in (slugs[:1] if slugs else [""]):
|
||||
events.append({
|
||||
"type": event_type,
|
||||
"claim_slug": slug,
|
||||
"domain": row["domain"] or "unknown",
|
||||
"contributor": contributor,
|
||||
"timestamp": merged_at,
|
||||
"ci_earned": round(ci_earned, 2),
|
||||
"summary": summary_text,
|
||||
"pr_number": row["number"],
|
||||
"source_channel": row["source_channel"] or "unknown",
|
||||
})
|
||||
|
||||
return events, claim_activity
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def _sort_events(events, claim_activity, sort_mode, now_ts):
|
||||
if sort_mode == "recent":
|
||||
events.sort(key=lambda e: e["timestamp"], reverse=True)
|
||||
elif sort_mode == "hot":
|
||||
def hot_key(e):
|
||||
slug = e["claim_slug"]
|
||||
ca = claim_activity.get(slug, {"challenges": 0, "enriches": 0, "signals": 0})
|
||||
try:
|
||||
from datetime import datetime
|
||||
evt_time = datetime.fromisoformat(e["timestamp"].replace("Z", "+00:00"))
|
||||
hours = (now_ts - evt_time.timestamp()) / 3600
|
||||
except (ValueError, AttributeError):
|
||||
hours = 9999
|
||||
return _hot_score(ca["challenges"], ca["enriches"], ca["signals"], hours)
|
||||
events.sort(key=hot_key, reverse=True)
|
||||
elif sort_mode == "important":
|
||||
type_rank = {"challenge": 0, "enrich": 1, "create": 2, "source": 3}
|
||||
events.sort(key=lambda e: (type_rank.get(e["type"], 4), -len(e["summary"])))
|
||||
return events
|
||||
|
||||
|
||||
async def handle_activity_feed(request):
|
||||
sort_mode = request.query.get("sort", "recent")
|
||||
if sort_mode not in ("hot", "recent", "important"):
|
||||
sort_mode = "recent"
|
||||
domain = request.query.get("domain", "")
|
||||
contributor = request.query.get("contributor", "")
|
||||
type_param = request.query.get("type", "")
|
||||
type_filter = {t.strip() for t in type_param.split(",") if t.strip()} if type_param else None
|
||||
try:
|
||||
limit = min(int(request.query.get("limit", "20")), 100)
|
||||
except ValueError:
|
||||
limit = 20
|
||||
try:
|
||||
offset = max(int(request.query.get("offset", "0")), 0)
|
||||
except ValueError:
|
||||
offset = 0
|
||||
|
||||
now = time.time()
|
||||
if _cache["data"] is None or (now - _cache["ts"]) > CACHE_TTL:
|
||||
_cache["data"] = _build_events()
|
||||
_cache["ts"] = now
|
||||
|
||||
events, claim_activity = _cache["data"]
|
||||
|
||||
filtered = events
|
||||
if domain:
|
||||
filtered = [e for e in filtered if e["domain"] == domain]
|
||||
if contributor:
|
||||
filtered = [e for e in filtered if e["contributor"] == contributor]
|
||||
if type_filter:
|
||||
filtered = [e for e in filtered if e["type"] in type_filter]
|
||||
|
||||
sorted_events = _sort_events(list(filtered), claim_activity, sort_mode, now)
|
||||
total = len(sorted_events)
|
||||
page = sorted_events[offset:offset + limit]
|
||||
|
||||
return web.json_response({
|
||||
"events": page,
|
||||
"total": total,
|
||||
"sort": sort_mode,
|
||||
"offset": offset,
|
||||
"limit": limit,
|
||||
}, headers={"Access-Control-Allow-Origin": "*"})
|
||||
|
||||
|
||||
def register(app):
|
||||
app.router.add_get("/api/activity-feed", handle_activity_feed)
|
||||
539
diagnostics/alerting.py
Normal file
539
diagnostics/alerting.py
Normal file
|
|
@ -0,0 +1,539 @@
|
|||
"""Argus active monitoring — health watchdog, quality regression, throughput anomaly detection.
|
||||
|
||||
Provides check functions that detect problems and return structured alerts.
|
||||
Called by /check endpoint (periodic cron) or on-demand.
|
||||
|
||||
Alert schema:
|
||||
{
|
||||
"id": str, # unique key for dedup (e.g. "dormant:ganymede")
|
||||
"severity": str, # "critical" | "warning" | "info"
|
||||
"category": str, # "health" | "quality" | "throughput" | "failure_pattern"
|
||||
"title": str, # human-readable headline
|
||||
"detail": str, # actionable description
|
||||
"agent": str|None, # affected agent (if applicable)
|
||||
"domain": str|None, # affected domain (if applicable)
|
||||
"detected_at": str, # ISO timestamp
|
||||
"auto_resolve": bool, # clears when condition clears
|
||||
}
|
||||
"""
|
||||
|
||||
import json
|
||||
import sqlite3
|
||||
import statistics
|
||||
from datetime import datetime, timezone
|
||||
|
||||
|
||||
# ─── Agent-domain mapping (static config, maintained by Argus) ──────────────
|
||||
|
||||
AGENT_DOMAINS = {
|
||||
"rio": ["internet-finance"],
|
||||
"clay": ["creative-industries"],
|
||||
"ganymede": None, # reviewer — cross-domain
|
||||
"epimetheus": None, # infra
|
||||
"leo": None, # standards
|
||||
"oberon": None, # evolution tracking
|
||||
"vida": None, # health monitoring
|
||||
"hermes": None, # comms
|
||||
"astra": None, # research
|
||||
}
|
||||
|
||||
# Thresholds
|
||||
DORMANCY_HOURS = 48
|
||||
APPROVAL_DROP_THRESHOLD = 15 # percentage points below 7-day baseline
|
||||
THROUGHPUT_DROP_RATIO = 0.5 # alert if today < 50% of 7-day SMA
|
||||
REJECTION_SPIKE_RATIO = 0.20 # single reason > 20% of recent rejections
|
||||
STUCK_LOOP_THRESHOLD = 3 # same agent + same rejection reason > N times in 6h
|
||||
COST_SPIKE_RATIO = 2.0 # daily cost > 2x 7-day average
|
||||
|
||||
|
||||
def _now_iso() -> str:
|
||||
return datetime.now(timezone.utc).isoformat()
|
||||
|
||||
|
||||
# ─── Check: Agent Health (dormancy detection) ───────────────────────────────
|
||||
|
||||
|
||||
def check_agent_health(conn: sqlite3.Connection) -> list[dict]:
|
||||
"""Detect agents with no PR activity in the last DORMANCY_HOURS hours."""
|
||||
alerts = []
|
||||
|
||||
# Get last activity per agent
|
||||
rows = conn.execute(
|
||||
"""SELECT agent, MAX(last_attempt) as latest, COUNT(*) as total_prs
|
||||
FROM prs WHERE agent IS NOT NULL
|
||||
GROUP BY agent"""
|
||||
).fetchall()
|
||||
|
||||
now = datetime.now(timezone.utc)
|
||||
for r in rows:
|
||||
agent = r["agent"]
|
||||
if agent in ("unknown", None):
|
||||
continue
|
||||
latest = r["latest"]
|
||||
if not latest:
|
||||
continue
|
||||
|
||||
last_dt = datetime.fromisoformat(latest)
|
||||
if last_dt.tzinfo is None:
|
||||
last_dt = last_dt.replace(tzinfo=timezone.utc)
|
||||
|
||||
hours_since = (now - last_dt).total_seconds() / 3600
|
||||
|
||||
if hours_since > DORMANCY_HOURS:
|
||||
alerts.append({
|
||||
"id": f"dormant:{agent}",
|
||||
"severity": "warning",
|
||||
"category": "health",
|
||||
"title": f"Agent '{agent}' dormant for {int(hours_since)}h",
|
||||
"detail": (
|
||||
f"No PR activity since {latest}. "
|
||||
f"Last seen {int(hours_since)}h ago (threshold: {DORMANCY_HOURS}h). "
|
||||
f"Total historical PRs: {r['total_prs']}."
|
||||
),
|
||||
"agent": agent,
|
||||
"domain": None,
|
||||
"detected_at": _now_iso(),
|
||||
"auto_resolve": True,
|
||||
})
|
||||
|
||||
return alerts
|
||||
|
||||
|
||||
# ─── Check: Quality Regression (approval rate drop) ─────────────────────────
|
||||
|
||||
|
||||
def check_quality_regression(conn: sqlite3.Connection) -> list[dict]:
|
||||
"""Detect approval rate drops vs 7-day baseline, per agent and per domain."""
|
||||
alerts = []
|
||||
|
||||
# 7-day baseline approval rate (overall)
|
||||
baseline = conn.execute(
|
||||
"""SELECT
|
||||
COUNT(CASE WHEN event='approved' THEN 1 END) as approved,
|
||||
COUNT(*) as total
|
||||
FROM audit_log
|
||||
WHERE stage='evaluate'
|
||||
AND event IN ('approved','changes_requested','domain_rejected','tier05_rejected')
|
||||
AND timestamp > datetime('now', '-7 days')"""
|
||||
).fetchone()
|
||||
baseline_rate = (baseline["approved"] / baseline["total"] * 100) if baseline["total"] else None
|
||||
|
||||
# 24h approval rate (overall)
|
||||
recent = conn.execute(
|
||||
"""SELECT
|
||||
COUNT(CASE WHEN event='approved' THEN 1 END) as approved,
|
||||
COUNT(*) as total
|
||||
FROM audit_log
|
||||
WHERE stage='evaluate'
|
||||
AND event IN ('approved','changes_requested','domain_rejected','tier05_rejected')
|
||||
AND timestamp > datetime('now', '-24 hours')"""
|
||||
).fetchone()
|
||||
recent_rate = (recent["approved"] / recent["total"] * 100) if recent["total"] else None
|
||||
|
||||
if baseline_rate is not None and recent_rate is not None:
|
||||
drop = baseline_rate - recent_rate
|
||||
if drop > APPROVAL_DROP_THRESHOLD:
|
||||
alerts.append({
|
||||
"id": "quality_regression:overall",
|
||||
"severity": "critical",
|
||||
"category": "quality",
|
||||
"title": f"Approval rate dropped {drop:.0f}pp (24h: {recent_rate:.0f}% vs 7d: {baseline_rate:.0f}%)",
|
||||
"detail": (
|
||||
f"24h approval rate ({recent_rate:.1f}%) is {drop:.1f} percentage points below "
|
||||
f"7-day baseline ({baseline_rate:.1f}%). "
|
||||
f"Evaluated {recent['total']} PRs in last 24h."
|
||||
),
|
||||
"agent": None,
|
||||
"domain": None,
|
||||
"detected_at": _now_iso(),
|
||||
"auto_resolve": True,
|
||||
})
|
||||
|
||||
# Per-agent approval rate (24h vs 7d) — only for agents with >=5 evals in each window
|
||||
# COALESCE: rejection events use $.agent, eval events use $.domain_agent (Epimetheus 2026-03-28)
|
||||
_check_approval_by_dimension(conn, alerts, "agent", "COALESCE(json_extract(detail, '$.agent'), json_extract(detail, '$.domain_agent'))")
|
||||
|
||||
# Per-domain approval rate (24h vs 7d) — Theseus addition
|
||||
_check_approval_by_dimension(conn, alerts, "domain", "json_extract(detail, '$.domain')")
|
||||
|
||||
return alerts
|
||||
|
||||
|
||||
_ALLOWED_DIM_EXPRS = frozenset({
|
||||
"json_extract(detail, '$.agent')",
|
||||
"json_extract(detail, '$.domain')",
|
||||
"COALESCE(json_extract(detail, '$.agent'), json_extract(detail, '$.domain_agent'))",
|
||||
})
|
||||
|
||||
|
||||
def _check_approval_by_dimension(conn, alerts, dim_name, dim_expr):
|
||||
"""Check approval rate regression grouped by a dimension. dim_expr must be in _ALLOWED_DIM_EXPRS."""
|
||||
if dim_expr not in _ALLOWED_DIM_EXPRS:
|
||||
raise ValueError(f"untrusted dim_expr: {dim_expr}")
|
||||
# 7-day baseline per dimension
|
||||
baseline_rows = conn.execute(
|
||||
f"""SELECT {dim_expr} as dim_val,
|
||||
COUNT(CASE WHEN event='approved' THEN 1 END) as approved,
|
||||
COUNT(*) as total
|
||||
FROM audit_log
|
||||
WHERE stage='evaluate'
|
||||
AND event IN ('approved','changes_requested','domain_rejected','tier05_rejected')
|
||||
AND timestamp > datetime('now', '-7 days')
|
||||
AND {dim_expr} IS NOT NULL
|
||||
GROUP BY dim_val HAVING total >= 5"""
|
||||
).fetchall()
|
||||
baselines = {r["dim_val"]: (r["approved"] / r["total"] * 100) for r in baseline_rows}
|
||||
|
||||
# 24h per dimension
|
||||
recent_rows = conn.execute(
|
||||
f"""SELECT {dim_expr} as dim_val,
|
||||
COUNT(CASE WHEN event='approved' THEN 1 END) as approved,
|
||||
COUNT(*) as total
|
||||
FROM audit_log
|
||||
WHERE stage='evaluate'
|
||||
AND event IN ('approved','changes_requested','domain_rejected','tier05_rejected')
|
||||
AND timestamp > datetime('now', '-24 hours')
|
||||
AND {dim_expr} IS NOT NULL
|
||||
GROUP BY dim_val HAVING total >= 5"""
|
||||
).fetchall()
|
||||
|
||||
for r in recent_rows:
|
||||
val = r["dim_val"]
|
||||
if val not in baselines:
|
||||
continue
|
||||
recent_rate = r["approved"] / r["total"] * 100
|
||||
base_rate = baselines[val]
|
||||
drop = base_rate - recent_rate
|
||||
if drop > APPROVAL_DROP_THRESHOLD:
|
||||
alerts.append({
|
||||
"id": f"quality_regression:{dim_name}:{val}",
|
||||
"severity": "warning",
|
||||
"category": "quality",
|
||||
"title": f"{dim_name.title()} '{val}' approval dropped {drop:.0f}pp",
|
||||
"detail": (
|
||||
f"24h: {recent_rate:.1f}% vs 7d baseline: {base_rate:.1f}% "
|
||||
f"({r['total']} evals in 24h)."
|
||||
),
|
||||
"agent": val if dim_name == "agent" else None,
|
||||
"domain": val if dim_name == "domain" else None,
|
||||
"detected_at": _now_iso(),
|
||||
"auto_resolve": True,
|
||||
})
|
||||
|
||||
|
||||
# ─── Check: Throughput Anomaly ──────────────────────────────────────────────
|
||||
|
||||
|
||||
def check_throughput(conn: sqlite3.Connection) -> list[dict]:
|
||||
"""Detect throughput stalling — today vs 7-day SMA."""
|
||||
alerts = []
|
||||
|
||||
# Daily merged counts for last 7 days
|
||||
rows = conn.execute(
|
||||
"""SELECT date(merged_at) as day, COUNT(*) as n
|
||||
FROM prs WHERE merged_at > datetime('now', '-7 days')
|
||||
GROUP BY day ORDER BY day"""
|
||||
).fetchall()
|
||||
|
||||
if len(rows) < 2:
|
||||
return alerts # Not enough data
|
||||
|
||||
daily_counts = [r["n"] for r in rows]
|
||||
sma = statistics.mean(daily_counts[:-1]) if len(daily_counts) > 1 else daily_counts[0]
|
||||
today_count = daily_counts[-1]
|
||||
|
||||
if sma > 0 and today_count < sma * THROUGHPUT_DROP_RATIO:
|
||||
alerts.append({
|
||||
"id": "throughput:stalling",
|
||||
"severity": "warning",
|
||||
"category": "throughput",
|
||||
"title": f"Throughput stalling: {today_count} merges today vs {sma:.0f}/day avg",
|
||||
"detail": (
|
||||
f"Today's merge count ({today_count}) is below {THROUGHPUT_DROP_RATIO:.0%} of "
|
||||
f"7-day average ({sma:.1f}/day). Daily counts: {daily_counts}."
|
||||
),
|
||||
"agent": None,
|
||||
"domain": None,
|
||||
"detected_at": _now_iso(),
|
||||
"auto_resolve": True,
|
||||
})
|
||||
|
||||
return alerts
|
||||
|
||||
|
||||
# ─── Check: Rejection Reason Spike ─────────────────────────────────────────
|
||||
|
||||
|
||||
def check_rejection_spike(conn: sqlite3.Connection) -> list[dict]:
|
||||
"""Detect single rejection reason exceeding REJECTION_SPIKE_RATIO of recent rejections."""
|
||||
alerts = []
|
||||
|
||||
# Total rejected PRs in 24h (prs.eval_issues is the canonical source — Epimetheus 2026-04-02)
|
||||
total = conn.execute(
|
||||
"""SELECT COUNT(*) as n FROM prs
|
||||
WHERE eval_issues IS NOT NULL AND eval_issues != '[]'
|
||||
AND created_at > datetime('now', '-24 hours')"""
|
||||
).fetchone()["n"]
|
||||
|
||||
if total < 10:
|
||||
return alerts # Not enough data
|
||||
|
||||
# Count by rejection tag from prs.eval_issues
|
||||
tags = conn.execute(
|
||||
"""SELECT value as tag, COUNT(*) as cnt
|
||||
FROM prs, json_each(prs.eval_issues)
|
||||
WHERE eval_issues IS NOT NULL AND eval_issues != '[]'
|
||||
AND created_at > datetime('now', '-24 hours')
|
||||
GROUP BY tag ORDER BY cnt DESC"""
|
||||
).fetchall()
|
||||
|
||||
for t in tags:
|
||||
ratio = t["cnt"] / total
|
||||
if ratio > REJECTION_SPIKE_RATIO:
|
||||
alerts.append({
|
||||
"id": f"rejection_spike:{t['tag']}",
|
||||
"severity": "warning",
|
||||
"category": "quality",
|
||||
"title": f"Rejection reason '{t['tag']}' at {ratio:.0%} of rejections",
|
||||
"detail": (
|
||||
f"'{t['tag']}' accounts for {t['cnt']}/{total} rejections in 24h "
|
||||
f"({ratio:.1%}). Threshold: {REJECTION_SPIKE_RATIO:.0%}."
|
||||
),
|
||||
"agent": None,
|
||||
"domain": None,
|
||||
"detected_at": _now_iso(),
|
||||
"auto_resolve": True,
|
||||
})
|
||||
|
||||
return alerts
|
||||
|
||||
|
||||
# ─── Check: Stuck Loops ────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def check_stuck_loops(conn: sqlite3.Connection) -> list[dict]:
|
||||
"""Detect agents repeatedly failing on the same rejection reason."""
|
||||
alerts = []
|
||||
|
||||
# Agent + rejection reason from prs table directly (Epimetheus correction 2026-04-02)
|
||||
rows = conn.execute(
|
||||
"""SELECT agent, value as tag, COUNT(*) as cnt
|
||||
FROM prs, json_each(prs.eval_issues)
|
||||
WHERE eval_issues IS NOT NULL AND eval_issues != '[]'
|
||||
AND agent IS NOT NULL
|
||||
AND created_at > datetime('now', '-6 hours')
|
||||
GROUP BY agent, tag
|
||||
HAVING cnt > ?""",
|
||||
(STUCK_LOOP_THRESHOLD,),
|
||||
).fetchall()
|
||||
|
||||
for r in rows:
|
||||
alerts.append({
|
||||
"id": f"stuck_loop:{r['agent']}:{r['tag']}",
|
||||
"severity": "critical",
|
||||
"category": "health",
|
||||
"title": f"Agent '{r['agent']}' stuck: '{r['tag']}' failed {r['cnt']}x in 6h",
|
||||
"detail": (
|
||||
f"Agent '{r['agent']}' has been rejected for '{r['tag']}' "
|
||||
f"{r['cnt']} times in the last 6 hours (threshold: {STUCK_LOOP_THRESHOLD}). "
|
||||
f"Stop and reassess."
|
||||
),
|
||||
"agent": r["agent"],
|
||||
"domain": None,
|
||||
"detected_at": _now_iso(),
|
||||
"auto_resolve": True,
|
||||
})
|
||||
|
||||
return alerts
|
||||
|
||||
|
||||
# ─── Check: Cost Spikes ────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def check_cost_spikes(conn: sqlite3.Connection) -> list[dict]:
|
||||
"""Detect daily cost exceeding 2x of 7-day average per agent."""
|
||||
alerts = []
|
||||
|
||||
# Check if costs table exists and has agent column
|
||||
try:
|
||||
cols = conn.execute("PRAGMA table_info(costs)").fetchall()
|
||||
col_names = {c["name"] for c in cols}
|
||||
except sqlite3.Error:
|
||||
return alerts
|
||||
|
||||
if "agent" not in col_names or "cost_usd" not in col_names:
|
||||
# Fall back to per-PR cost tracking
|
||||
rows = conn.execute(
|
||||
"""SELECT agent,
|
||||
SUM(CASE WHEN created_at > datetime('now', '-1 day') THEN cost_usd ELSE 0 END) as today_cost,
|
||||
SUM(CASE WHEN created_at > datetime('now', '-7 days') THEN cost_usd ELSE 0 END) / 7.0 as avg_daily
|
||||
FROM prs WHERE agent IS NOT NULL AND cost_usd > 0
|
||||
GROUP BY agent
|
||||
HAVING avg_daily > 0"""
|
||||
).fetchall()
|
||||
else:
|
||||
rows = conn.execute(
|
||||
"""SELECT agent,
|
||||
SUM(CASE WHEN timestamp > datetime('now', '-1 day') THEN cost_usd ELSE 0 END) as today_cost,
|
||||
SUM(CASE WHEN timestamp > datetime('now', '-7 days') THEN cost_usd ELSE 0 END) / 7.0 as avg_daily
|
||||
FROM costs WHERE agent IS NOT NULL
|
||||
GROUP BY agent
|
||||
HAVING avg_daily > 0"""
|
||||
).fetchall()
|
||||
|
||||
for r in rows:
|
||||
if r["avg_daily"] and r["today_cost"] > r["avg_daily"] * COST_SPIKE_RATIO:
|
||||
ratio = r["today_cost"] / r["avg_daily"]
|
||||
alerts.append({
|
||||
"id": f"cost_spike:{r['agent']}",
|
||||
"severity": "warning",
|
||||
"category": "health",
|
||||
"title": f"Agent '{r['agent']}' cost spike: ${r['today_cost']:.2f} today ({ratio:.1f}x avg)",
|
||||
"detail": (
|
||||
f"Today's cost (${r['today_cost']:.2f}) is {ratio:.1f}x the 7-day daily average "
|
||||
f"(${r['avg_daily']:.2f}). Threshold: {COST_SPIKE_RATIO}x."
|
||||
),
|
||||
"agent": r["agent"],
|
||||
"domain": None,
|
||||
"detected_at": _now_iso(),
|
||||
"auto_resolve": True,
|
||||
})
|
||||
|
||||
return alerts
|
||||
|
||||
|
||||
# ─── Check: Domain Rejection Patterns (Theseus addition) ───────────────────
|
||||
|
||||
|
||||
def check_domain_rejection_patterns(conn: sqlite3.Connection) -> list[dict]:
|
||||
"""Track rejection reason shift per domain — surfaces domain maturity issues."""
|
||||
alerts = []
|
||||
|
||||
# Per-domain rejection breakdown in 24h from prs table (Epimetheus correction 2026-04-02)
|
||||
rows = conn.execute(
|
||||
"""SELECT domain, value as tag, COUNT(*) as cnt
|
||||
FROM prs, json_each(prs.eval_issues)
|
||||
WHERE eval_issues IS NOT NULL AND eval_issues != '[]'
|
||||
AND domain IS NOT NULL
|
||||
AND created_at > datetime('now', '-24 hours')
|
||||
GROUP BY domain, tag
|
||||
ORDER BY domain, cnt DESC"""
|
||||
).fetchall()
|
||||
|
||||
# Group by domain
|
||||
domain_tags = {}
|
||||
for r in rows:
|
||||
d = r["domain"]
|
||||
if d not in domain_tags:
|
||||
domain_tags[d] = []
|
||||
domain_tags[d].append({"tag": r["tag"], "count": r["cnt"]})
|
||||
|
||||
# Flag if a domain has >50% of rejections from a single reason (concentrated failure)
|
||||
for domain, tags in domain_tags.items():
|
||||
total = sum(t["count"] for t in tags)
|
||||
if total < 5:
|
||||
continue
|
||||
top = tags[0]
|
||||
ratio = top["count"] / total
|
||||
if ratio > 0.5:
|
||||
alerts.append({
|
||||
"id": f"domain_rejection_pattern:{domain}:{top['tag']}",
|
||||
"severity": "info",
|
||||
"category": "failure_pattern",
|
||||
"title": f"Domain '{domain}': {ratio:.0%} of rejections are '{top['tag']}'",
|
||||
"detail": (
|
||||
f"In domain '{domain}', {top['count']}/{total} rejections (24h) are for "
|
||||
f"'{top['tag']}'. This may indicate a systematic issue with evidence standards "
|
||||
f"or schema compliance in this domain."
|
||||
),
|
||||
"agent": None,
|
||||
"domain": domain,
|
||||
"detected_at": _now_iso(),
|
||||
"auto_resolve": True,
|
||||
})
|
||||
|
||||
return alerts
|
||||
|
||||
|
||||
# ─── Failure Report Generator ───────────────────────────────────────────────
|
||||
|
||||
|
||||
def generate_failure_report(conn: sqlite3.Connection, agent: str, hours: int = 24) -> dict | None:
|
||||
"""Compile a failure report for a specific agent.
|
||||
|
||||
Returns top rejection reasons, example PRs, and suggested fixes.
|
||||
Designed to be sent directly to the agent via Pentagon messaging.
|
||||
"""
|
||||
hours = int(hours) # defensive — callers should pass int, but enforce it
|
||||
rows = conn.execute(
|
||||
"""SELECT value as tag, COUNT(*) as cnt,
|
||||
GROUP_CONCAT(DISTINCT number) as pr_numbers
|
||||
FROM prs, json_each(prs.eval_issues)
|
||||
WHERE eval_issues IS NOT NULL AND eval_issues != '[]'
|
||||
AND agent = ?
|
||||
AND created_at > datetime('now', ? || ' hours')
|
||||
GROUP BY tag ORDER BY cnt DESC
|
||||
LIMIT 5""",
|
||||
(agent, f"-{hours}"),
|
||||
).fetchall()
|
||||
|
||||
if not rows:
|
||||
return None
|
||||
|
||||
total_rejections = sum(r["cnt"] for r in rows)
|
||||
top_reasons = []
|
||||
for r in rows:
|
||||
prs = r["pr_numbers"].split(",")[:3] if r["pr_numbers"] else []
|
||||
top_reasons.append({
|
||||
"reason": r["tag"],
|
||||
"count": r["cnt"],
|
||||
"pct": round(r["cnt"] / total_rejections * 100, 1),
|
||||
"example_prs": prs,
|
||||
"suggestion": _suggest_fix(r["tag"]),
|
||||
})
|
||||
|
||||
return {
|
||||
"agent": agent,
|
||||
"period_hours": hours,
|
||||
"total_rejections": total_rejections,
|
||||
"top_reasons": top_reasons,
|
||||
"generated_at": _now_iso(),
|
||||
}
|
||||
|
||||
|
||||
def _suggest_fix(rejection_tag: str) -> str:
|
||||
"""Map known rejection reasons to actionable suggestions."""
|
||||
suggestions = {
|
||||
"broken_wiki_links": "Check that all [[wiki links]] in claims resolve to existing files. Run link validation before submitting.",
|
||||
"near_duplicate": "Search existing claims before creating new ones. Use semantic search to find similar claims.",
|
||||
"frontmatter_schema": "Validate YAML frontmatter against the claim schema. Required fields: title, domain, confidence, type.",
|
||||
"weak_evidence": "Add concrete sources, data points, or citations. Claims need evidence that can be independently verified.",
|
||||
"missing_confidence": "Every claim needs a confidence level: proven, likely, experimental, or speculative.",
|
||||
"domain_mismatch": "Ensure claims are filed under the correct domain. Check domain definitions if unsure.",
|
||||
"too_broad": "Break broad claims into specific, testable sub-claims.",
|
||||
"missing_links": "Claims should link to related claims, entities, or sources. Isolated claims are harder to verify.",
|
||||
}
|
||||
return suggestions.get(rejection_tag, f"Review rejection reason '{rejection_tag}' and adjust extraction accordingly.")
|
||||
|
||||
|
||||
# ─── Run All Checks ────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def run_all_checks(conn: sqlite3.Connection) -> list[dict]:
|
||||
"""Execute all check functions and return combined alerts."""
|
||||
alerts = []
|
||||
alerts.extend(check_agent_health(conn))
|
||||
alerts.extend(check_quality_regression(conn))
|
||||
alerts.extend(check_throughput(conn))
|
||||
alerts.extend(check_rejection_spike(conn))
|
||||
alerts.extend(check_stuck_loops(conn))
|
||||
alerts.extend(check_cost_spikes(conn))
|
||||
alerts.extend(check_domain_rejection_patterns(conn))
|
||||
return alerts
|
||||
|
||||
|
||||
def format_alert_message(alert: dict) -> str:
|
||||
"""Format an alert for Pentagon messaging."""
|
||||
severity_icon = {"critical": "!!", "warning": "!", "info": "~"}
|
||||
icon = severity_icon.get(alert["severity"], "?")
|
||||
return f"[{icon}] {alert['title']}\n{alert['detail']}"
|
||||
132
diagnostics/alerting_routes.py
Normal file
132
diagnostics/alerting_routes.py
Normal file
|
|
@ -0,0 +1,132 @@
|
|||
"""Route handlers for /check and /api/alerts endpoints.
|
||||
|
||||
Import into app.py and register routes in create_app().
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from aiohttp import web
|
||||
from alerting import run_all_checks, generate_failure_report, format_alert_message # requires CWD = deploy dir; switch to relative import if packaged
|
||||
|
||||
logger = logging.getLogger("argus.alerting")
|
||||
|
||||
# In-memory alert store (replaced each /check cycle, persists between requests)
|
||||
_active_alerts: list[dict] = []
|
||||
_last_check: str | None = None
|
||||
|
||||
|
||||
async def handle_check(request):
|
||||
"""GET /check — run all monitoring checks, update active alerts, return results.
|
||||
|
||||
Designed to be called by systemd timer every 5 minutes.
|
||||
Returns JSON summary of all detected issues.
|
||||
"""
|
||||
conn = request.app["_alerting_conn_func"]()
|
||||
try:
|
||||
alerts = run_all_checks(conn)
|
||||
|
||||
# Generate failure reports for agents with stuck loops
|
||||
failure_reports = {}
|
||||
stuck_agents = {a["agent"] for a in alerts if a["category"] == "health" and "stuck" in a["id"] and a["agent"]}
|
||||
for agent in stuck_agents:
|
||||
report = generate_failure_report(conn, agent)
|
||||
if report:
|
||||
failure_reports[agent] = report
|
||||
except Exception as e:
|
||||
logger.error("Check failed: %s", e)
|
||||
return web.json_response({"error": str(e)}, status=500)
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
global _active_alerts, _last_check
|
||||
_active_alerts = alerts
|
||||
_last_check = datetime.now(timezone.utc).isoformat()
|
||||
|
||||
result = {
|
||||
"checked_at": _last_check,
|
||||
"alert_count": len(alerts),
|
||||
"critical": sum(1 for a in alerts if a["severity"] == "critical"),
|
||||
"warning": sum(1 for a in alerts if a["severity"] == "warning"),
|
||||
"info": sum(1 for a in alerts if a["severity"] == "info"),
|
||||
"alerts": alerts,
|
||||
"failure_reports": failure_reports,
|
||||
}
|
||||
|
||||
logger.info(
|
||||
"Check complete: %d alerts (%d critical, %d warning)",
|
||||
len(alerts),
|
||||
result["critical"],
|
||||
result["warning"],
|
||||
)
|
||||
|
||||
return web.json_response(result)
|
||||
|
||||
|
||||
async def handle_api_alerts(request):
|
||||
"""GET /api/alerts — return current active alerts.
|
||||
|
||||
Query params:
|
||||
severity: filter by severity (critical, warning, info)
|
||||
category: filter by category (health, quality, throughput, failure_pattern)
|
||||
agent: filter by agent name
|
||||
domain: filter by domain
|
||||
"""
|
||||
alerts = list(_active_alerts)
|
||||
|
||||
# Filters
|
||||
severity = request.query.get("severity")
|
||||
if severity:
|
||||
alerts = [a for a in alerts if a["severity"] == severity]
|
||||
|
||||
category = request.query.get("category")
|
||||
if category:
|
||||
alerts = [a for a in alerts if a["category"] == category]
|
||||
|
||||
agent = request.query.get("agent")
|
||||
if agent:
|
||||
alerts = [a for a in alerts if a.get("agent") == agent]
|
||||
|
||||
domain = request.query.get("domain")
|
||||
if domain:
|
||||
alerts = [a for a in alerts if a.get("domain") == domain]
|
||||
|
||||
return web.json_response({
|
||||
"alerts": alerts,
|
||||
"total": len(alerts),
|
||||
"last_check": _last_check,
|
||||
})
|
||||
|
||||
|
||||
async def handle_api_failure_report(request):
|
||||
"""GET /api/failure-report/{agent} — generate failure report for an agent.
|
||||
|
||||
Query params:
|
||||
hours: lookback window (default 24)
|
||||
"""
|
||||
agent = request.match_info["agent"]
|
||||
try:
|
||||
hours = min(int(request.query.get("hours", "24")), 168)
|
||||
except ValueError:
|
||||
hours = 24
|
||||
conn = request.app["_alerting_conn_func"]()
|
||||
try:
|
||||
report = generate_failure_report(conn, agent, hours)
|
||||
finally:
|
||||
conn.close()
|
||||
if not report:
|
||||
return web.json_response({"agent": agent, "status": "no_rejections", "period_hours": hours})
|
||||
|
||||
return web.json_response(report)
|
||||
|
||||
|
||||
def register_alerting_routes(app, get_conn_func):
|
||||
"""Register alerting routes on the app.
|
||||
|
||||
get_conn_func: callable that returns a read-only sqlite3.Connection
|
||||
"""
|
||||
app["_alerting_conn_func"] = get_conn_func
|
||||
app.router.add_get("/check", handle_check)
|
||||
app.router.add_get("/api/alerts", handle_api_alerts)
|
||||
app.router.add_get("/api/failure-report/{agent}", handle_api_failure_report)
|
||||
1555
diagnostics/app.py
1555
diagnostics/app.py
File diff suppressed because it is too large
Load diff
140
diagnostics/backfill_submitted_by.py
Normal file
140
diagnostics/backfill_submitted_by.py
Normal file
|
|
@ -0,0 +1,140 @@
|
|||
#!/usr/bin/env python3
|
||||
"""One-time backfill: populate submitted_by on prs table from source archive files.
|
||||
|
||||
Matches PRs to sources via branch name slug → source filename.
|
||||
Reads proposed_by and intake_tier from source frontmatter.
|
||||
|
||||
Run: python3 backfill_submitted_by.py
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import sqlite3
|
||||
from pathlib import Path
|
||||
|
||||
DB_PATH = os.environ.get("DB_PATH", "/opt/teleo-eval/pipeline/pipeline.db")
|
||||
ARCHIVE_DIR = Path(os.environ.get("ARCHIVE_DIR", "/opt/teleo-eval/workspaces/main/inbox/archive"))
|
||||
|
||||
|
||||
def parse_frontmatter(path: Path) -> dict:
|
||||
"""Parse YAML-like frontmatter from a markdown file."""
|
||||
text = path.read_text(encoding="utf-8", errors="replace")
|
||||
if not text.startswith("---"):
|
||||
return {}
|
||||
end = text.find("---", 3)
|
||||
if end == -1:
|
||||
return {}
|
||||
fm = {}
|
||||
for line in text[3:end].strip().split("\n"):
|
||||
line = line.strip()
|
||||
if not line or ":" not in line:
|
||||
continue
|
||||
key, _, val = line.partition(":")
|
||||
key = key.strip()
|
||||
val = val.strip().strip('"').strip("'")
|
||||
if val.lower() == "null" or val == "":
|
||||
val = None
|
||||
fm[key] = val
|
||||
return fm
|
||||
|
||||
|
||||
def slug_from_branch(branch: str) -> str:
|
||||
"""Extract source slug from branch name like 'extract/2026-04-06-slug-hash'."""
|
||||
if "/" in branch:
|
||||
branch = branch.split("/", 1)[1]
|
||||
# Strip trailing hex hash (e.g., -3e68, -a6af)
|
||||
branch = re.sub(r"-[0-9a-f]{4}$", "", branch)
|
||||
return branch
|
||||
|
||||
|
||||
def main():
|
||||
conn = sqlite3.connect(DB_PATH, timeout=30)
|
||||
conn.row_factory = sqlite3.Row
|
||||
|
||||
# Build source index: filename stem → frontmatter
|
||||
source_index = {}
|
||||
if ARCHIVE_DIR.exists():
|
||||
for f in ARCHIVE_DIR.glob("*.md"):
|
||||
fm = parse_frontmatter(f)
|
||||
source_index[f.stem] = fm
|
||||
print(f"Indexed {len(source_index)} source files from {ARCHIVE_DIR}")
|
||||
|
||||
# Get all PRs without submitted_by
|
||||
prs = conn.execute(
|
||||
"SELECT number, branch FROM prs WHERE submitted_by IS NULL AND branch IS NOT NULL"
|
||||
).fetchall()
|
||||
print(f"Found {len(prs)} PRs without submitted_by")
|
||||
|
||||
updated = 0
|
||||
for pr in prs:
|
||||
branch = pr["branch"]
|
||||
slug = slug_from_branch(branch)
|
||||
|
||||
# Try to match slug to a source file
|
||||
fm = source_index.get(slug)
|
||||
if not fm:
|
||||
# Try partial matching: slug might be a substring of the source filename
|
||||
for stem, sfm in source_index.items():
|
||||
if slug in stem or stem in slug:
|
||||
fm = sfm
|
||||
break
|
||||
|
||||
if fm:
|
||||
proposed_by = fm.get("proposed_by")
|
||||
intake_tier = fm.get("intake_tier")
|
||||
|
||||
if proposed_by:
|
||||
contributor = proposed_by.strip().strip('"').strip("'")
|
||||
elif intake_tier == "research-task":
|
||||
# Derive agent from branch prefix
|
||||
prefix = branch.split("/", 1)[0] if "/" in branch else "unknown"
|
||||
agent_map = {
|
||||
"extract": "pipeline", "ingestion": "pipeline",
|
||||
"rio": "rio", "theseus": "theseus", "vida": "vida",
|
||||
"clay": "clay", "astra": "astra", "leo": "leo",
|
||||
"reweave": "pipeline",
|
||||
}
|
||||
agent = agent_map.get(prefix, prefix)
|
||||
contributor = f"{agent} (self-directed)"
|
||||
elif intake_tier == "directed":
|
||||
contributor = "@m3taversal"
|
||||
else:
|
||||
# Default: if source exists but no proposed_by, it was Cory's submission
|
||||
contributor = "@m3taversal"
|
||||
|
||||
if contributor:
|
||||
conn.execute(
|
||||
"UPDATE prs SET submitted_by = ?, source_path = ? WHERE number = ?",
|
||||
(contributor, f"inbox/archive/{slug}.md", pr["number"]),
|
||||
)
|
||||
updated += 1
|
||||
else:
|
||||
# Agent-named branches from overnight research sessions
|
||||
if branch.startswith(("rio/", "theseus/", "vida/", "clay/", "astra/", "leo/")):
|
||||
agent = branch.split("/", 1)[0]
|
||||
conn.execute(
|
||||
"UPDATE prs SET submitted_by = ? WHERE number = ?",
|
||||
(f"{agent} (self-directed)", pr["number"]),
|
||||
)
|
||||
updated += 1
|
||||
elif branch.startswith("reweave/"):
|
||||
conn.execute(
|
||||
"UPDATE prs SET submitted_by = 'pipeline (reweave)' WHERE number = ?",
|
||||
(pr["number"],),
|
||||
)
|
||||
updated += 1
|
||||
else:
|
||||
# Everything else (extract/, ingestion/, unknown) → Cory directed it
|
||||
conn.execute(
|
||||
"UPDATE prs SET submitted_by = '@m3taversal' WHERE number = ?",
|
||||
(pr["number"],),
|
||||
)
|
||||
updated += 1
|
||||
|
||||
conn.commit()
|
||||
conn.close()
|
||||
print(f"Updated {updated}/{len(prs)} PRs with submitted_by")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
161
diagnostics/claims_api.py
Normal file
161
diagnostics/claims_api.py
Normal file
|
|
@ -0,0 +1,161 @@
|
|||
"""Claims API endpoint — serves claim data from the codex filesystem."""
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
import yaml
|
||||
from pathlib import Path
|
||||
from aiohttp import web
|
||||
|
||||
CODEX_ROOT = Path("/opt/teleo-eval/workspaces/main/domains")
|
||||
_cache = {"data": None, "ts": 0}
|
||||
CACHE_TTL = 300 # 5 minutes
|
||||
|
||||
def _parse_frontmatter(filepath):
|
||||
try:
|
||||
text = filepath.read_text(encoding="utf-8")
|
||||
if not text.startswith("---"):
|
||||
return None
|
||||
end = text.index("---", 3)
|
||||
fm = yaml.safe_load(text[3:end])
|
||||
if not fm or fm.get("type") != "claim":
|
||||
return None
|
||||
body = text[end+3:].strip()
|
||||
# Count wiki-links
|
||||
links = re.findall(r"\[\[([^\]]+)\]\]", body)
|
||||
# Extract first paragraph as summary
|
||||
paragraphs = [p.strip() for p in body.split("\n\n") if p.strip() and not p.strip().startswith("#")]
|
||||
summary = paragraphs[0][:300] if paragraphs else ""
|
||||
return {
|
||||
"slug": filepath.stem,
|
||||
"title": fm.get("title", filepath.stem.replace("-", " ")),
|
||||
"domain": fm.get("domain", "unknown"),
|
||||
"confidence": fm.get("confidence", "unknown"),
|
||||
"agent": fm.get("agent"),
|
||||
"scope": fm.get("scope"),
|
||||
"created": str(fm.get("created", "")),
|
||||
"source": fm.get("source", "") if isinstance(fm.get("source"), str) else "",
|
||||
"sourcer": fm.get("sourcer", ""),
|
||||
"wiki_link_count": len(links),
|
||||
"summary": summary,
|
||||
"challenged_by": fm.get("challenged_by"),
|
||||
"related_claims": fm.get("related_claims", []),
|
||||
}
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _load_all_claims():
|
||||
now = time.time()
|
||||
if _cache["data"] and now - _cache["ts"] < CACHE_TTL:
|
||||
return _cache["data"]
|
||||
|
||||
claims = []
|
||||
for domain_dir in sorted(CODEX_ROOT.iterdir()):
|
||||
if not domain_dir.is_dir():
|
||||
continue
|
||||
for f in sorted(domain_dir.glob("*.md")):
|
||||
if f.name == "_map.md":
|
||||
continue
|
||||
c = _parse_frontmatter(f)
|
||||
if c:
|
||||
claims.append(c)
|
||||
|
||||
_cache["data"] = claims
|
||||
_cache["ts"] = now
|
||||
return claims
|
||||
|
||||
|
||||
async def handle_claims(request):
|
||||
claims = _load_all_claims()
|
||||
|
||||
# Filters
|
||||
domain = request.query.get("domain")
|
||||
search = request.query.get("q", "").lower()
|
||||
confidence = request.query.get("confidence")
|
||||
agent = request.query.get("agent")
|
||||
sort = request.query.get("sort", "recent") # recent, alpha, domain
|
||||
|
||||
filtered = claims
|
||||
if domain:
|
||||
filtered = [c for c in filtered if c["domain"] == domain]
|
||||
if confidence:
|
||||
filtered = [c for c in filtered if c["confidence"] == confidence]
|
||||
if agent:
|
||||
filtered = [c for c in filtered if c["agent"] == agent]
|
||||
if search:
|
||||
filtered = [c for c in filtered if search in c["title"].lower() or search in c["summary"].lower()]
|
||||
|
||||
# Sort
|
||||
if sort == "recent":
|
||||
filtered.sort(key=lambda c: c["created"], reverse=True)
|
||||
elif sort == "alpha":
|
||||
filtered.sort(key=lambda c: c["title"].lower())
|
||||
elif sort == "domain":
|
||||
filtered.sort(key=lambda c: (c["domain"], c["title"].lower()))
|
||||
|
||||
# Pagination
|
||||
limit = min(int(request.query.get("limit", "50")), 200)
|
||||
offset = int(request.query.get("offset", "0"))
|
||||
page = filtered[offset:offset+limit]
|
||||
|
||||
# Domain counts for sidebar
|
||||
domain_counts = {}
|
||||
for c in claims:
|
||||
domain_counts[c["domain"]] = domain_counts.get(c["domain"], 0) + 1
|
||||
|
||||
return web.json_response({
|
||||
"claims": page,
|
||||
"total": len(filtered),
|
||||
"offset": offset,
|
||||
"limit": limit,
|
||||
"domains": dict(sorted(domain_counts.items(), key=lambda x: -x[1])),
|
||||
"confidence_levels": sorted(set(c["confidence"] for c in claims)),
|
||||
"agents": sorted(set(c["agent"] for c in claims if c["agent"])),
|
||||
}, headers={"Access-Control-Allow-Origin": "*"})
|
||||
|
||||
|
||||
async def handle_claim_detail(request):
|
||||
slug = request.match_info["slug"]
|
||||
claims = _load_all_claims()
|
||||
for c in claims:
|
||||
if c["slug"] == slug:
|
||||
# Read full body for detail view
|
||||
for domain_dir in CODEX_ROOT.iterdir():
|
||||
if not domain_dir.is_dir():
|
||||
continue
|
||||
f = domain_dir / f"{slug}.md"
|
||||
if f.exists():
|
||||
text = f.read_text(encoding="utf-8")
|
||||
end = text.index("---", 3)
|
||||
body = text[end+3:].strip()
|
||||
c["body"] = body
|
||||
break
|
||||
return web.json_response(c, headers={"Access-Control-Allow-Origin": "*"})
|
||||
return web.json_response({"error": "claim not found"}, status=404)
|
||||
|
||||
|
||||
async def handle_domains(request):
|
||||
claims = _load_all_claims()
|
||||
domains = {}
|
||||
for c in claims:
|
||||
d = c["domain"]
|
||||
if d not in domains:
|
||||
domains[d] = {"name": d, "count": 0, "agents": set(), "confidence_dist": {}}
|
||||
domains[d]["count"] += 1
|
||||
if c["agent"]:
|
||||
domains[d]["agents"].add(c["agent"])
|
||||
conf = c["confidence"]
|
||||
domains[d]["confidence_dist"][conf] = domains[d]["confidence_dist"].get(conf, 0) + 1
|
||||
|
||||
result = []
|
||||
for d in sorted(domains.values(), key=lambda x: -x["count"]):
|
||||
d["agents"] = sorted(d["agents"])
|
||||
result.append(d)
|
||||
|
||||
return web.json_response(result, headers={"Access-Control-Allow-Origin": "*"})
|
||||
|
||||
|
||||
def register_claims_routes(app):
|
||||
app.router.add_get("/api/claims", handle_claims)
|
||||
app.router.add_get("/api/claims/{slug}", handle_claim_detail)
|
||||
app.router.add_get("/api/domains", handle_domains)
|
||||
365
diagnostics/contributor_profile_api.py
Normal file
365
diagnostics/contributor_profile_api.py
Normal file
|
|
@ -0,0 +1,365 @@
|
|||
"""Contributor profile API — GET /api/contributors/{handle}"""
|
||||
|
||||
import sqlite3
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
from datetime import datetime
|
||||
|
||||
DB_PATH = os.environ.get("PIPELINE_DB", "/opt/teleo-eval/pipeline/pipeline.db")
|
||||
SYSTEM_ACCOUNTS = {"pipeline", "unknown", "teleo-agents", "teleo pipeline"}
|
||||
CODEX_PATH = "/opt/teleo-eval/workspaces/main"
|
||||
|
||||
CI_WEIGHTS = {
|
||||
"sourcer": 0.15,
|
||||
"extractor": 0.05,
|
||||
"challenger": 0.35,
|
||||
"synthesizer": 0.25,
|
||||
"reviewer": 0.20,
|
||||
}
|
||||
|
||||
FOUNDING_CUTOFF = "2026-03-15"
|
||||
|
||||
BADGE_DEFS = {
|
||||
"FOUNDING CONTRIBUTOR": {"rarity": "limited", "desc": "Contributed during pre-launch phase"},
|
||||
"BELIEF MOVER": {"rarity": "rare", "desc": "Challenge that led to a claim revision"},
|
||||
"KNOWLEDGE SOURCER": {"rarity": "uncommon", "desc": "Source that generated 3+ claims"},
|
||||
"DOMAIN SPECIALIST": {"rarity": "rare", "desc": "Top 3 CI contributor in a domain"},
|
||||
"VETERAN": {"rarity": "uncommon", "desc": "10+ accepted contributions"},
|
||||
"FIRST BLOOD": {"rarity": "common", "desc": "First contribution of any kind"},
|
||||
"CONTRIBUTOR": {"rarity": "common", "desc": "Account created + first accepted contribution"},
|
||||
}
|
||||
|
||||
|
||||
def _get_conn():
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
conn.row_factory = sqlite3.Row
|
||||
return conn
|
||||
|
||||
|
||||
def _compute_ci(row):
|
||||
total = 0
|
||||
for role, weight in CI_WEIGHTS.items():
|
||||
total += (row.get(f"{role}_count", 0) or 0) * weight
|
||||
return round(total, 2)
|
||||
|
||||
|
||||
def _compute_badges(handle, row, domain_breakdown, conn):
|
||||
badges = []
|
||||
first = row.get("first_contribution", "")
|
||||
|
||||
if first and first <= FOUNDING_CUTOFF:
|
||||
badges.append("FOUNDING CONTRIBUTOR")
|
||||
|
||||
claims = row.get("claims_merged", 0) or 0
|
||||
if claims > 0:
|
||||
badges.append("CONTRIBUTOR")
|
||||
badges.append("FIRST BLOOD")
|
||||
|
||||
if claims >= 10:
|
||||
badges.append("VETERAN")
|
||||
|
||||
challenger = row.get("challenger_count", 0) or 0
|
||||
challenge_ci = row.get("_challenge_count_from_scores", 0)
|
||||
if challenger > 0 or challenge_ci > 0:
|
||||
badges.append("BELIEF MOVER")
|
||||
|
||||
sourcer = row.get("sourcer_count", 0) or 0
|
||||
if sourcer >= 3:
|
||||
badges.append("KNOWLEDGE SOURCER")
|
||||
|
||||
return badges
|
||||
|
||||
|
||||
def _get_domain_breakdown(handle, conn):
|
||||
rows = conn.execute("""
|
||||
SELECT domain, COUNT(*) as cnt
|
||||
FROM prs
|
||||
WHERE status='merged' AND (LOWER(agent)=LOWER(?) OR LOWER(submitted_by)=LOWER(?))
|
||||
AND domain IS NOT NULL
|
||||
GROUP BY domain ORDER BY cnt DESC
|
||||
""", (handle, handle)).fetchall()
|
||||
return {r["domain"]: r["cnt"] for r in rows}
|
||||
|
||||
|
||||
def _get_contribution_timeline(handle, conn, limit=20):
|
||||
rows = conn.execute("""
|
||||
SELECT number, domain, status, created_at, description, commit_type, source_path
|
||||
FROM prs
|
||||
WHERE status='merged' AND (LOWER(agent)=LOWER(?) OR LOWER(submitted_by)=LOWER(?))
|
||||
ORDER BY created_at DESC LIMIT ?
|
||||
""", (handle, handle, limit)).fetchall()
|
||||
|
||||
timeline = []
|
||||
for r in rows:
|
||||
desc = r["description"] or ""
|
||||
if not desc and r["source_path"]:
|
||||
desc = os.path.basename(r["source_path"]).replace("-", " ").replace(".md", "")
|
||||
timeline.append({
|
||||
"pr_number": r["number"],
|
||||
"domain": r["domain"],
|
||||
"date": r["created_at"][:10] if r["created_at"] else None,
|
||||
"type": _classify_commit(r["commit_type"]),
|
||||
"summary": desc[:200] if desc else None,
|
||||
})
|
||||
return timeline
|
||||
|
||||
|
||||
def _classify_commit(commit_type):
|
||||
if not commit_type:
|
||||
return "create"
|
||||
ct = commit_type.lower()
|
||||
if "challenge" in ct:
|
||||
return "challenge"
|
||||
if "enrich" in ct or "update" in ct or "reweave" in ct:
|
||||
return "enrich"
|
||||
return "create"
|
||||
|
||||
|
||||
def _get_review_stats(handle, conn):
|
||||
rows = conn.execute("""
|
||||
SELECT outcome, COUNT(*) as cnt
|
||||
FROM review_records
|
||||
WHERE LOWER(agent) = LOWER(?)
|
||||
GROUP BY outcome
|
||||
""", (handle,)).fetchall()
|
||||
stats = {}
|
||||
for r in rows:
|
||||
stats[r["outcome"]] = r["cnt"]
|
||||
return stats
|
||||
|
||||
|
||||
def _get_action_ci(handle, conn):
|
||||
"""Get action-type CI from contribution_scores table.
|
||||
|
||||
Checks both exact handle and common variants (with/without suffix).
|
||||
"""
|
||||
h = handle.lower()
|
||||
base = re.sub(r"[-_]\w+\d+$", "", h)
|
||||
variants = list({h, base}) if base and base != h else [h]
|
||||
try:
|
||||
placeholders = ",".join("?" for _ in variants)
|
||||
rows = conn.execute(f"""
|
||||
SELECT event_type, SUM(ci_earned) as total, COUNT(*) as cnt
|
||||
FROM contribution_scores
|
||||
WHERE LOWER(contributor) IN ({placeholders})
|
||||
GROUP BY event_type
|
||||
""", variants).fetchall()
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
if not rows:
|
||||
return None
|
||||
|
||||
breakdown = {}
|
||||
total = 0.0
|
||||
for r in rows:
|
||||
breakdown[r["event_type"]] = {
|
||||
"count": r["cnt"],
|
||||
"ci": round(r["total"], 4),
|
||||
}
|
||||
total += r["total"]
|
||||
|
||||
return {
|
||||
"total": round(total, 4),
|
||||
"breakdown": breakdown,
|
||||
}
|
||||
|
||||
|
||||
def _get_git_contributor(handle):
|
||||
"""Fallback: check git log for contributors not in pipeline.db."""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["git", "log", "--all", "--format=%H|%an|%ae|%aI", "--diff-filter=A", "--", "domains/"],
|
||||
capture_output=True, text=True, cwd=CODEX_PATH, timeout=30
|
||||
)
|
||||
if result.returncode != 0:
|
||||
return None
|
||||
|
||||
claims = []
|
||||
for line in result.stdout.strip().split("\n"):
|
||||
if not line:
|
||||
continue
|
||||
parts = line.split("|", 3)
|
||||
if len(parts) < 4:
|
||||
continue
|
||||
sha, name, email, date = parts
|
||||
if handle.lower() in name.lower() or handle.lower() in email.lower():
|
||||
claims.append({"sha": sha, "author": name, "email": email, "date": date[:10]})
|
||||
|
||||
if not claims:
|
||||
return None
|
||||
|
||||
return {
|
||||
"handle": handle,
|
||||
"display_name": claims[0]["author"],
|
||||
"email": claims[0]["email"],
|
||||
"first_contribution": min(c["date"] for c in claims),
|
||||
"last_contribution": max(c["date"] for c in claims),
|
||||
"claims_merged": len(claims),
|
||||
"sourcer_count": 0,
|
||||
"extractor_count": 0,
|
||||
"challenger_count": 0,
|
||||
"synthesizer_count": 0,
|
||||
"reviewer_count": 0,
|
||||
}
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def get_contributor_profile(handle):
|
||||
conn = _get_conn()
|
||||
try:
|
||||
row = conn.execute(
|
||||
"SELECT * FROM contributors WHERE LOWER(handle) = LOWER(?)", (handle,)
|
||||
).fetchone()
|
||||
|
||||
if row:
|
||||
data = dict(row)
|
||||
else:
|
||||
git_data = _get_git_contributor(handle)
|
||||
if git_data:
|
||||
data = git_data
|
||||
else:
|
||||
return None
|
||||
|
||||
ci_score = _compute_ci(data)
|
||||
action_ci = _get_action_ci(handle, conn)
|
||||
domain_breakdown = _get_domain_breakdown(handle, conn)
|
||||
timeline = _get_contribution_timeline(handle, conn)
|
||||
review_stats = _get_review_stats(handle, conn)
|
||||
if action_ci and "challenge" in action_ci.get("breakdown", {}):
|
||||
data["_challenge_count_from_scores"] = action_ci["breakdown"]["challenge"]["count"]
|
||||
badges = _compute_badges(handle, data, domain_breakdown, conn)
|
||||
|
||||
# For git-only contributors, build domain breakdown from git
|
||||
if not domain_breakdown and not row:
|
||||
domain_breakdown = _git_domain_breakdown(handle)
|
||||
|
||||
hero_badge = None
|
||||
rarity_order = ["limited", "rare", "uncommon", "common"]
|
||||
for rarity in rarity_order:
|
||||
for b in badges:
|
||||
if BADGE_DEFS.get(b, {}).get("rarity") == rarity:
|
||||
hero_badge = b
|
||||
break
|
||||
if hero_badge:
|
||||
break
|
||||
|
||||
role_breakdown = {
|
||||
"sourcer": data.get("sourcer_count", 0) or 0,
|
||||
"extractor": data.get("extractor_count", 0) or 0,
|
||||
"challenger": data.get("challenger_count", 0) or 0,
|
||||
"synthesizer": data.get("synthesizer_count", 0) or 0,
|
||||
"reviewer": data.get("reviewer_count", 0) or 0,
|
||||
}
|
||||
total_roles = sum(role_breakdown.values())
|
||||
role_pct = {}
|
||||
for k, v in role_breakdown.items():
|
||||
role_pct[k] = round(v / total_roles * 100) if total_roles > 0 else 0
|
||||
|
||||
return {
|
||||
"handle": data.get("handle", handle),
|
||||
"display_name": data.get("display_name"),
|
||||
"ci_score": ci_score,
|
||||
"action_ci": action_ci,
|
||||
"primary_ci": action_ci["total"] if action_ci else ci_score,
|
||||
"hero_badge": hero_badge,
|
||||
"badges": [{"name": b, **BADGE_DEFS.get(b, {})} for b in badges],
|
||||
"joined": data.get("first_contribution"),
|
||||
"last_active": data.get("last_contribution"),
|
||||
"claims_merged": data.get("claims_merged", 0) or 0,
|
||||
"principal": data.get("principal"),
|
||||
"role_breakdown": role_breakdown,
|
||||
"role_percentages": role_pct,
|
||||
"domain_breakdown": domain_breakdown,
|
||||
"review_stats": review_stats,
|
||||
"contribution_timeline": timeline,
|
||||
"active_domains": list(domain_breakdown.keys()),
|
||||
}
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def _git_domain_breakdown(handle):
|
||||
"""For git-only contributors, count claims by domain from file paths."""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["git", "log", "--all", "--name-only", "--format=COMMIT|%an", "--diff-filter=A", "--", "domains/"],
|
||||
capture_output=True, text=True, cwd=CODEX_PATH, timeout=30
|
||||
)
|
||||
if result.returncode != 0:
|
||||
return {}
|
||||
|
||||
domains = {}
|
||||
current_match = False
|
||||
for line in result.stdout.strip().split("\n"):
|
||||
if line.startswith("COMMIT|"):
|
||||
author = line.split("|", 1)[1]
|
||||
current_match = handle.lower() in author.lower()
|
||||
elif current_match and line.startswith("domains/"):
|
||||
parts = line.split("/")
|
||||
if len(parts) >= 2:
|
||||
domain = parts[1]
|
||||
domains[domain] = domains.get(domain, 0) + 1
|
||||
|
||||
return domains
|
||||
except Exception:
|
||||
return {}
|
||||
|
||||
|
||||
async def handle_contributor_profile(request):
|
||||
from aiohttp import web
|
||||
handle = request.match_info["handle"]
|
||||
profile = get_contributor_profile(handle)
|
||||
if profile is None:
|
||||
return web.json_response({"error": f"Contributor '{handle}' not found"}, status=404)
|
||||
return web.json_response(profile)
|
||||
|
||||
|
||||
async def handle_contributors_list(request):
|
||||
from aiohttp import web
|
||||
conn = _get_conn()
|
||||
try:
|
||||
min_claims = int(request.query.get("min_claims", "1"))
|
||||
rows = conn.execute("""
|
||||
SELECT handle, display_name, first_contribution, last_contribution,
|
||||
sourcer_count, extractor_count, challenger_count, synthesizer_count,
|
||||
reviewer_count, claims_merged, principal
|
||||
FROM contributors
|
||||
WHERE claims_merged >= ?
|
||||
ORDER BY claims_merged DESC
|
||||
""", (min_claims,)).fetchall()
|
||||
|
||||
contributors = []
|
||||
for r in rows:
|
||||
data = dict(r)
|
||||
if data["handle"].lower() in SYSTEM_ACCOUNTS:
|
||||
continue
|
||||
ci = _compute_ci(data)
|
||||
action_ci = _get_action_ci(data["handle"], conn)
|
||||
action_total = action_ci["total"] if action_ci else 0.0
|
||||
contributors.append({
|
||||
"handle": data["handle"],
|
||||
"display_name": data["display_name"],
|
||||
"ci_score": ci,
|
||||
"action_ci": action_total,
|
||||
"primary_ci": action_total if action_total > 0 else ci,
|
||||
"claims_merged": data["claims_merged"],
|
||||
"first_contribution": data["first_contribution"],
|
||||
"last_contribution": data["last_contribution"],
|
||||
"principal": data["principal"],
|
||||
})
|
||||
|
||||
return web.json_response({
|
||||
"contributors": contributors,
|
||||
"total": len(contributors),
|
||||
})
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def register_contributor_routes(app):
|
||||
app.router.add_get("/api/contributors/list", handle_contributors_list)
|
||||
app.router.add_get("/api/contributors/{handle}", handle_contributor_profile)
|
||||
312
diagnostics/daily_digest.py
Normal file
312
diagnostics/daily_digest.py
Normal file
|
|
@ -0,0 +1,312 @@
|
|||
"""Daily digest: aggregates 24h activity for Telegram bot consumption.
|
||||
|
||||
Data sources:
|
||||
- pipeline.db: merged PRs, audit events, contributor activity
|
||||
- Forgejo API: PR descriptions for claim summaries
|
||||
- claim-index: total claims, domain breakdown
|
||||
- review queue: pending approval counts
|
||||
|
||||
Endpoint: GET /api/daily-digest?hours=24
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import sqlite3
|
||||
from datetime import datetime, timezone, timedelta
|
||||
from typing import Any
|
||||
|
||||
import aiohttp
|
||||
|
||||
logger = logging.getLogger("argus.daily_digest")
|
||||
|
||||
FORGEJO_BASE = "https://git.livingip.xyz/api/v1"
|
||||
REPO = "teleo/teleo-codex"
|
||||
CLAIM_INDEX_URL = "http://localhost:8080/claim-index"
|
||||
|
||||
|
||||
async def fetch_daily_digest(
|
||||
db_path: str,
|
||||
forgejo_token: str | None = None,
|
||||
hours: int = 24,
|
||||
timeout_s: int = 15,
|
||||
) -> dict[str, Any]:
|
||||
"""Build the daily digest payload.
|
||||
|
||||
Returns structured data for Epimetheus's Telegram bot to format and send.
|
||||
"""
|
||||
cutoff = (datetime.now(timezone.utc) - timedelta(hours=hours)).isoformat()
|
||||
|
||||
# Parallel: DB queries + HTTP fetches
|
||||
db_data = _query_db(db_path, cutoff, hours)
|
||||
|
||||
headers = {"Accept": "application/json"}
|
||||
if forgejo_token:
|
||||
headers["Authorization"] = f"token {forgejo_token}"
|
||||
|
||||
connector = aiohttp.TCPConnector(ssl=False)
|
||||
async with aiohttp.ClientSession(headers=headers, connector=connector) as session:
|
||||
# Fetch claim-index, merged PR details from Forgejo, and open PR count in parallel
|
||||
merged_numbers = [pr["number"] for pr in db_data["merged_prs"]]
|
||||
|
||||
tasks = [
|
||||
_fetch_claim_index(session, timeout_s),
|
||||
_fetch_merged_pr_details(session, merged_numbers, timeout_s),
|
||||
_fetch_open_pr_count(session, timeout_s),
|
||||
]
|
||||
claim_index, pr_details, open_pr_count = await asyncio.gather(*tasks)
|
||||
|
||||
# Enrich merged PRs with Forgejo descriptions
|
||||
merged_claims = _build_merged_claims(db_data["merged_prs"], pr_details)
|
||||
|
||||
return {
|
||||
"period_hours": hours,
|
||||
"generated_at": datetime.now(timezone.utc).isoformat(),
|
||||
"claims_merged": merged_claims,
|
||||
"pipeline_stats": {
|
||||
"prs_merged": db_data["prs_merged"],
|
||||
"prs_opened": db_data["prs_opened"],
|
||||
"prs_rejected": db_data["prs_rejected"],
|
||||
"approval_rate": db_data["approval_rate"],
|
||||
"top_rejection_reasons": db_data["top_rejection_reasons"],
|
||||
},
|
||||
"agent_activity": db_data["agent_activity"],
|
||||
"pending_review": {
|
||||
"open_prs": open_pr_count,
|
||||
},
|
||||
"knowledge_base": {
|
||||
"total_claims": claim_index.get("total_claims", 0),
|
||||
"domains": claim_index.get("domains", {}),
|
||||
"orphan_ratio": claim_index.get("orphan_ratio", 0),
|
||||
"cross_domain_links": claim_index.get("cross_domain_links", 0),
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _query_db(db_path: str, cutoff: str, hours: int) -> dict[str, Any]:
|
||||
"""Run all DB queries synchronously (SQLite is fast enough for digest)."""
|
||||
conn = sqlite3.connect(f"file:{db_path}?mode=ro", uri=True)
|
||||
conn.row_factory = sqlite3.Row
|
||||
try:
|
||||
# Merged PRs in period
|
||||
merged_prs = conn.execute(
|
||||
"""SELECT number, branch, domain, agent, commit_type, merged_at, cost_usd
|
||||
FROM prs WHERE status = 'merged' AND merged_at >= ?
|
||||
ORDER BY merged_at DESC""",
|
||||
(cutoff,),
|
||||
).fetchall()
|
||||
|
||||
prs_merged = len(merged_prs)
|
||||
|
||||
# PRs opened in period
|
||||
prs_opened = conn.execute(
|
||||
"SELECT COUNT(*) FROM prs WHERE created_at >= ?", (cutoff,)
|
||||
).fetchone()[0]
|
||||
|
||||
# Rejected PRs in period (closed/zombie with rejection events)
|
||||
prs_rejected = conn.execute(
|
||||
"""SELECT COUNT(DISTINCT json_extract(detail, '$.pr'))
|
||||
FROM audit_log
|
||||
WHERE stage = 'evaluate'
|
||||
AND event IN ('domain_rejected', 'tier05_rejected')
|
||||
AND timestamp >= ?""",
|
||||
(cutoff,),
|
||||
).fetchone()[0]
|
||||
|
||||
# Approval rate
|
||||
total_evaluated = prs_merged + prs_rejected
|
||||
approval_rate = round(prs_merged / total_evaluated * 100, 1) if total_evaluated > 0 else 0.0
|
||||
|
||||
# Top rejection reasons
|
||||
rejection_rows = conn.execute(
|
||||
"""SELECT json_extract(detail, '$.issues') as issues
|
||||
FROM audit_log
|
||||
WHERE stage = 'evaluate'
|
||||
AND event IN ('domain_rejected', 'tier05_rejected')
|
||||
AND timestamp >= ?
|
||||
AND json_valid(detail)""",
|
||||
(cutoff,),
|
||||
).fetchall()
|
||||
|
||||
reason_counts: dict[str, int] = {}
|
||||
import json
|
||||
for row in rejection_rows:
|
||||
if row["issues"]:
|
||||
try:
|
||||
issues = json.loads(row["issues"])
|
||||
if isinstance(issues, list):
|
||||
for issue in issues:
|
||||
reason_counts[issue] = reason_counts.get(issue, 0) + 1
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
pass
|
||||
|
||||
top_rejection_reasons = sorted(reason_counts.items(), key=lambda x: -x[1])[:5]
|
||||
top_rejection_reasons = [{"reason": r, "count": c} for r, c in top_rejection_reasons]
|
||||
|
||||
# Agent activity — who contributed what
|
||||
agent_rows = conn.execute(
|
||||
"""SELECT agent,
|
||||
COUNT(*) as total,
|
||||
SUM(CASE WHEN status = 'merged' THEN 1 ELSE 0 END) as merged,
|
||||
SUM(CASE WHEN commit_type = 'extract' OR commit_type = 'research' THEN 1 ELSE 0 END) as extractions,
|
||||
SUM(CASE WHEN commit_type = 'challenge' THEN 1 ELSE 0 END) as challenges,
|
||||
SUM(CASE WHEN commit_type = 'enrich' OR commit_type = 'reweave' THEN 1 ELSE 0 END) as enrichments,
|
||||
SUM(CASE WHEN commit_type = 'synthesize' THEN 1 ELSE 0 END) as syntheses
|
||||
FROM prs
|
||||
WHERE created_at >= ? AND agent IS NOT NULL AND agent != ''
|
||||
GROUP BY agent
|
||||
ORDER BY merged DESC""",
|
||||
(cutoff,),
|
||||
).fetchall()
|
||||
|
||||
agent_activity = [
|
||||
{
|
||||
"agent": row["agent"],
|
||||
"prs_total": row["total"],
|
||||
"prs_merged": row["merged"],
|
||||
"extractions": row["extractions"],
|
||||
"challenges": row["challenges"],
|
||||
"enrichments": row["enrichments"],
|
||||
"syntheses": row["syntheses"],
|
||||
}
|
||||
for row in agent_rows
|
||||
]
|
||||
|
||||
return {
|
||||
"merged_prs": [dict(pr) for pr in merged_prs],
|
||||
"prs_merged": prs_merged,
|
||||
"prs_opened": prs_opened,
|
||||
"prs_rejected": prs_rejected,
|
||||
"approval_rate": approval_rate,
|
||||
"top_rejection_reasons": top_rejection_reasons,
|
||||
"agent_activity": agent_activity,
|
||||
}
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
async def _fetch_claim_index(session: aiohttp.ClientSession, timeout_s: int) -> dict:
|
||||
"""Fetch claim-index summary stats."""
|
||||
try:
|
||||
async with session.get(
|
||||
CLAIM_INDEX_URL,
|
||||
timeout=aiohttp.ClientTimeout(total=timeout_s),
|
||||
) as resp:
|
||||
if resp.status == 200:
|
||||
data = await resp.json()
|
||||
return {
|
||||
"total_claims": data.get("total_claims", 0),
|
||||
"domains": data.get("domains", {}),
|
||||
"orphan_ratio": data.get("orphan_ratio", 0),
|
||||
"cross_domain_links": data.get("cross_domain_links", 0),
|
||||
}
|
||||
except Exception as e:
|
||||
logger.warning("Failed to fetch claim-index: %s", e)
|
||||
return {}
|
||||
|
||||
|
||||
async def _fetch_merged_pr_details(
|
||||
session: aiohttp.ClientSession,
|
||||
pr_numbers: list[int],
|
||||
timeout_s: int,
|
||||
) -> dict[int, dict]:
|
||||
"""Fetch PR details from Forgejo for merged PRs (parallel)."""
|
||||
if not pr_numbers:
|
||||
return {}
|
||||
|
||||
async def _fetch_one(n: int) -> tuple[int, dict]:
|
||||
url = f"{FORGEJO_BASE}/repos/{REPO}/pulls/{n}"
|
||||
try:
|
||||
async with session.get(url, timeout=aiohttp.ClientTimeout(total=timeout_s)) as resp:
|
||||
if resp.status == 200:
|
||||
return n, await resp.json()
|
||||
except Exception as e:
|
||||
logger.warning("Failed to fetch PR #%d: %s", n, e)
|
||||
return n, {}
|
||||
|
||||
results = await asyncio.gather(*[_fetch_one(n) for n in pr_numbers])
|
||||
return {n: data for n, data in results}
|
||||
|
||||
|
||||
async def _fetch_open_pr_count(session: aiohttp.ClientSession, timeout_s: int) -> int:
|
||||
"""Get count of open PRs from Forgejo."""
|
||||
url = f"{FORGEJO_BASE}/repos/{REPO}/pulls?state=open&limit=1"
|
||||
try:
|
||||
async with session.get(url, timeout=aiohttp.ClientTimeout(total=timeout_s)) as resp:
|
||||
if resp.status == 200:
|
||||
# Forgejo returns X-Total-Count header
|
||||
total = resp.headers.get("X-Total-Count")
|
||||
if total is not None:
|
||||
return int(total)
|
||||
# Fallback: fetch all and count
|
||||
data = await resp.json()
|
||||
return len(data)
|
||||
except Exception as e:
|
||||
logger.warning("Failed to fetch open PR count: %s", e)
|
||||
return 0
|
||||
|
||||
|
||||
def _build_merged_claims(
|
||||
merged_prs: list[dict],
|
||||
pr_details: dict[int, dict],
|
||||
) -> list[dict]:
|
||||
"""Build claim summaries from merged PRs + Forgejo PR bodies."""
|
||||
claims = []
|
||||
for pr in merged_prs:
|
||||
number = pr["number"]
|
||||
detail = pr_details.get(number, {})
|
||||
|
||||
# Extract summary from PR body (first paragraph or first 200 chars)
|
||||
body = detail.get("body", "") or ""
|
||||
summary = _extract_summary(body)
|
||||
|
||||
claims.append({
|
||||
"pr_number": number,
|
||||
"title": detail.get("title", pr.get("branch", f"PR #{number}")),
|
||||
"agent": pr.get("agent", "unknown"),
|
||||
"domain": pr.get("domain", "unknown"),
|
||||
"commit_type": pr.get("commit_type", "knowledge"),
|
||||
"summary": summary,
|
||||
"merged_at": pr.get("merged_at", ""),
|
||||
"cost_usd": pr.get("cost_usd", 0.0),
|
||||
"url": detail.get("html_url", ""),
|
||||
})
|
||||
|
||||
return claims
|
||||
|
||||
|
||||
def _extract_summary(body: str) -> str:
|
||||
"""Extract a 1-2 sentence summary from PR body markdown.
|
||||
|
||||
Looks for a Summary section first, then falls back to first non-header paragraph.
|
||||
"""
|
||||
if not body:
|
||||
return ""
|
||||
|
||||
lines = body.strip().split("\n")
|
||||
|
||||
# Look for ## Summary section
|
||||
in_summary = False
|
||||
summary_lines = []
|
||||
for line in lines:
|
||||
if line.strip().lower().startswith("## summary"):
|
||||
in_summary = True
|
||||
continue
|
||||
if in_summary:
|
||||
if line.startswith("##"):
|
||||
break
|
||||
stripped = line.strip()
|
||||
if stripped and not stripped.startswith("- ["): # skip checklists
|
||||
summary_lines.append(stripped)
|
||||
if len(summary_lines) >= 3:
|
||||
break
|
||||
|
||||
if summary_lines:
|
||||
return " ".join(summary_lines)[:300]
|
||||
|
||||
# Fallback: first non-header, non-empty paragraph
|
||||
for line in lines:
|
||||
stripped = line.strip()
|
||||
if stripped and not stripped.startswith("#") and not stripped.startswith("- ["):
|
||||
return stripped[:300]
|
||||
|
||||
return ""
|
||||
62
diagnostics/daily_digest_routes.py
Normal file
62
diagnostics/daily_digest_routes.py
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
"""Route handlers for /api/daily-digest endpoint.
|
||||
|
||||
Import into app.py and register routes in create_app().
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
from aiohttp import web
|
||||
from daily_digest import fetch_daily_digest
|
||||
|
||||
logger = logging.getLogger("argus.daily_digest")
|
||||
|
||||
|
||||
async def handle_daily_digest(request):
|
||||
"""GET /api/daily-digest — structured data for Telegram daily digest.
|
||||
|
||||
Query params:
|
||||
hours: lookback period in hours (default: 24, max: 168)
|
||||
|
||||
Returns JSON with:
|
||||
claims_merged: merged claims with summaries
|
||||
pipeline_stats: PRs merged/opened/rejected, approval rate, rejection reasons
|
||||
agent_activity: per-agent contribution breakdown
|
||||
pending_review: open PR count
|
||||
knowledge_base: total claims, domain breakdown, orphan ratio
|
||||
"""
|
||||
# Validate hours param
|
||||
try:
|
||||
hours = int(request.query.get("hours", 24))
|
||||
hours = max(1, min(hours, 168)) # clamp to 1h-7d
|
||||
except (ValueError, TypeError):
|
||||
hours = 24
|
||||
|
||||
db_path = request.app.get("_db_path")
|
||||
if not db_path:
|
||||
return web.json_response({"error": "database not configured"}, status=500)
|
||||
|
||||
token = request.app.get("_forgejo_token")
|
||||
|
||||
try:
|
||||
digest = await fetch_daily_digest(
|
||||
db_path=db_path,
|
||||
forgejo_token=token,
|
||||
hours=hours,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error("Daily digest fetch failed: %s", e)
|
||||
return web.json_response({"error": str(e)}, status=500)
|
||||
|
||||
return web.json_response(digest)
|
||||
|
||||
|
||||
def register_daily_digest_routes(app, db_path: str, forgejo_token: str | None = None):
|
||||
"""Register daily digest routes on the app.
|
||||
|
||||
db_path: path to pipeline.db
|
||||
forgejo_token: optional Forgejo API token
|
||||
"""
|
||||
app["_db_path"] = db_path
|
||||
if forgejo_token:
|
||||
app["_forgejo_token"] = forgejo_token
|
||||
app.router.add_get("/api/daily-digest", handle_daily_digest)
|
||||
1424
diagnostics/dashboard-v2.html
Normal file
1424
diagnostics/dashboard-v2.html
Normal file
File diff suppressed because one or more lines are too long
348
diagnostics/dashboard_agents.py
Normal file
348
diagnostics/dashboard_agents.py
Normal file
|
|
@ -0,0 +1,348 @@
|
|||
"""Page 3: Agent Performance — "Who's contributing what?"
|
||||
|
||||
Slim version v2 per Cory feedback (2026-04-03):
|
||||
- Hero: total merged, rejection rate, claims/week — 3 numbers
|
||||
- Table: agent, merged, rejection rate, last active, inbox depth — 5 columns
|
||||
- One chart: weekly contributions by agent (stacked bar)
|
||||
- No CI scores, no yield (redundant with rejection rate), no top issue (too granular)
|
||||
|
||||
Fetches /api/agents-dashboard + /api/agent-state, merges client-side.
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
from shared_ui import render_page
|
||||
|
||||
|
||||
def render_agents_page(contributors_principal: list, contributors_agent: list, now: datetime) -> str:
|
||||
"""Render the slim Agent Performance page."""
|
||||
|
||||
body = """
|
||||
<!-- Hero Metrics (filled by JS) -->
|
||||
<div class="grid" id="hero-metrics">
|
||||
<div class="card" style="text-align:center;color:#8b949e">Loading...</div>
|
||||
</div>
|
||||
|
||||
<!-- Per-Agent Table -->
|
||||
<div class="section">
|
||||
<div class="section-title">Agent Breakdown (30d)</div>
|
||||
<div class="card">
|
||||
<table id="agent-table">
|
||||
<tr>
|
||||
<th>Agent</th>
|
||||
<th style="text-align:right">Merged</th>
|
||||
<th style="text-align:right">Rejection Rate</th>
|
||||
<th style="text-align:right">Last Active</th>
|
||||
<th style="text-align:right">Inbox</th>
|
||||
</tr>
|
||||
<tr><td colspan="5" style="color:#8b949e;text-align:center">Loading...</td></tr>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Weekly Contributions Chart -->
|
||||
<div class="section">
|
||||
<div class="chart-container" style="max-width:100%">
|
||||
<h2>Claims Merged per Week by Agent</h2>
|
||||
<canvas id="trendChart"></canvas>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Agent Scorecard (from review_records) -->
|
||||
<div class="section">
|
||||
<div class="section-title">Agent Scorecard (Structured Reviews)</div>
|
||||
<div class="card">
|
||||
<table id="scorecard-table">
|
||||
<tr><td colspan="7" style="color:#8b949e;text-align:center">Loading...</td></tr>
|
||||
</table>
|
||||
<div id="scorecard-rejections" style="margin-top:12px"></div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Latest Session Digests -->
|
||||
<div class="section">
|
||||
<div class="section-title">Latest Session Digests</div>
|
||||
<div id="digest-container">
|
||||
<div class="card" style="text-align:center;color:#8b949e">Loading...</div>
|
||||
</div>
|
||||
</div>
|
||||
"""
|
||||
|
||||
scripts = """<script>
|
||||
Promise.all([
|
||||
fetch('/api/agents-dashboard?days=30').then(r => r.json()),
|
||||
fetch('/api/agent-state').then(r => r.json()).catch(() => ({agents: {}}))
|
||||
])
|
||||
.then(([data, stateData]) => {
|
||||
const agents = data.agents || {};
|
||||
const agentState = stateData.agents || {};
|
||||
|
||||
// Sort by approved desc, filter to agents with evals
|
||||
const sorted = Object.entries(agents)
|
||||
.filter(([_, a]) => a.evaluated > 0)
|
||||
.sort((a, b) => (b[1].approved || 0) - (a[1].approved || 0));
|
||||
|
||||
// --- Hero metrics ---
|
||||
let totalMerged = 0, totalRejected = 0, totalEval = 0;
|
||||
const weekMerged = {};
|
||||
for (const [_, a] of sorted) {
|
||||
totalMerged += a.approved || 0;
|
||||
totalRejected += a.rejected || 0;
|
||||
totalEval += a.evaluated || 0;
|
||||
if (a.weekly_trend) {
|
||||
a.weekly_trend.forEach(w => {
|
||||
weekMerged[w.week] = (weekMerged[w.week] || 0) + (w.merged || 0);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
const weeks = Object.keys(weekMerged).sort();
|
||||
const recentWeeks = weeks.slice(-4);
|
||||
const claimsPerWeek = recentWeeks.length > 0
|
||||
? Math.round(recentWeeks.reduce((s, w) => s + weekMerged[w], 0) / recentWeeks.length)
|
||||
: 0;
|
||||
const rejRate = totalEval > 0 ? ((totalRejected / totalEval) * 100).toFixed(1) : '0';
|
||||
|
||||
document.getElementById('hero-metrics').innerHTML =
|
||||
'<div class="card" style="text-align:center">' +
|
||||
'<div class="label">Claims Merged (30d)</div>' +
|
||||
'<div style="font-size:32px;font-weight:700;color:#3fb950">' + totalMerged + '</div>' +
|
||||
'</div>' +
|
||||
'<div class="card" style="text-align:center">' +
|
||||
'<div class="label">Rejection Rate</div>' +
|
||||
'<div style="font-size:32px;font-weight:700;color:' + (parseFloat(rejRate) > 30 ? '#f85149' : '#e3b341') + '">' + rejRate + '%</div>' +
|
||||
'</div>' +
|
||||
'<div class="card" style="text-align:center">' +
|
||||
'<div class="label">Claims/Week (avg last 4w)</div>' +
|
||||
'<div style="font-size:32px;font-weight:700;color:#58a6ff">' + claimsPerWeek + '</div>' +
|
||||
'</div>';
|
||||
|
||||
// --- Per-agent table ---
|
||||
if (sorted.length === 0) {
|
||||
document.getElementById('agent-table').innerHTML =
|
||||
'<tr><th>Agent</th><th>Merged</th><th>Rejection Rate</th><th>Last Active</th><th>Inbox</th></tr>' +
|
||||
'<tr><td colspan="5" style="color:#8b949e;text-align:center">No evaluation data yet</td></tr>';
|
||||
return;
|
||||
}
|
||||
|
||||
// Helper: format relative time
|
||||
function timeAgo(isoStr) {
|
||||
if (!isoStr) return '<span style="color:#484f58">unknown</span>';
|
||||
const diff = (Date.now() - new Date(isoStr).getTime()) / 1000;
|
||||
if (diff < 3600) return Math.round(diff / 60) + 'm ago';
|
||||
if (diff < 86400) return Math.round(diff / 3600) + 'h ago';
|
||||
return Math.round(diff / 86400) + 'd ago';
|
||||
}
|
||||
|
||||
let tableHtml = '<tr><th>Agent</th><th style="text-align:right">Merged</th>' +
|
||||
'<th style="text-align:right">Rejection Rate</th>' +
|
||||
'<th style="text-align:right">Last Active</th>' +
|
||||
'<th style="text-align:right">Inbox</th></tr>';
|
||||
|
||||
for (const [name, a] of sorted) {
|
||||
const color = agentColor(name);
|
||||
const rr = a.evaluated > 0 ? ((a.rejected / a.evaluated) * 100).toFixed(1) + '%' : '-';
|
||||
const rrColor = a.rejection_rate > 0.3 ? '#f85149' : a.rejection_rate > 0.15 ? '#e3b341' : '#3fb950';
|
||||
|
||||
// Agent state lookup (case-insensitive match)
|
||||
const stateKey = Object.keys(agentState).find(k => k.toLowerCase() === name.toLowerCase()) || '';
|
||||
const state = agentState[stateKey] || {};
|
||||
const lastActive = timeAgo(state.last_active);
|
||||
const inboxDepth = state.inbox_depth != null ? state.inbox_depth : '-';
|
||||
const inboxColor = inboxDepth > 10 ? '#f85149' : inboxDepth > 5 ? '#d29922' : inboxDepth > 0 ? '#58a6ff' : '#3fb950';
|
||||
|
||||
tableHtml += '<tr>' +
|
||||
'<td><span style="display:inline-block;width:8px;height:8px;border-radius:50%;background:' + color + ';margin-right:6px"></span>' + esc(name) + '</td>' +
|
||||
'<td style="text-align:right;font-weight:600;color:#3fb950">' + (a.approved || 0) + '</td>' +
|
||||
'<td style="text-align:right;color:' + rrColor + '">' + rr + '</td>' +
|
||||
'<td style="text-align:right">' + lastActive + '</td>' +
|
||||
'<td style="text-align:right;color:' + inboxColor + '">' + inboxDepth + '</td>' +
|
||||
'</tr>';
|
||||
}
|
||||
|
||||
document.getElementById('agent-table').innerHTML = tableHtml;
|
||||
|
||||
// --- Weekly trend chart ---
|
||||
const allWeeks = new Set();
|
||||
const agentNames = [];
|
||||
for (const [name, a] of sorted) {
|
||||
if (a.weekly_trend && a.weekly_trend.length > 0) {
|
||||
agentNames.push(name);
|
||||
a.weekly_trend.forEach(w => allWeeks.add(w.week));
|
||||
}
|
||||
}
|
||||
const sortedWeeks = [...allWeeks].sort();
|
||||
|
||||
if (sortedWeeks.length > 0 && agentNames.length > 0) {
|
||||
const trendMap = {};
|
||||
for (const [name, a] of sorted) {
|
||||
if (a.weekly_trend) {
|
||||
trendMap[name] = {};
|
||||
a.weekly_trend.forEach(w => { trendMap[name][w.week] = w.merged; });
|
||||
}
|
||||
}
|
||||
|
||||
new Chart(document.getElementById('trendChart'), {
|
||||
type: 'bar',
|
||||
data: {
|
||||
labels: sortedWeeks,
|
||||
datasets: agentNames.map(name => ({
|
||||
label: name,
|
||||
data: sortedWeeks.map(w => (trendMap[name] || {})[w] || 0),
|
||||
backgroundColor: agentColor(name),
|
||||
})),
|
||||
},
|
||||
options: {
|
||||
responsive: true,
|
||||
scales: {
|
||||
x: { stacked: true, grid: { display: false } },
|
||||
y: { stacked: true, title: { display: true, text: 'Claims Merged' }, min: 0 },
|
||||
},
|
||||
plugins: { legend: { labels: { boxWidth: 12 } } },
|
||||
},
|
||||
});
|
||||
}
|
||||
}).catch(err => {
|
||||
document.getElementById('hero-metrics').innerHTML =
|
||||
'<div class="card" style="grid-column:1/-1;text-align:center;color:#f85149">Failed to load: ' + err.message + '</div>';
|
||||
});
|
||||
|
||||
// --- Agent Scorecard ---
|
||||
fetch('/api/agent-scorecard')
|
||||
.then(r => r.json())
|
||||
.then(data => {
|
||||
const cards = data.scorecards || [];
|
||||
if (cards.length === 0 || cards.every(c => c.total_reviews === 0)) {
|
||||
document.getElementById('scorecard-table').innerHTML =
|
||||
'<tr><td colspan="7" style="color:#8b949e;text-align:center">No structured review data yet (review_records table is empty)</td></tr>';
|
||||
return;
|
||||
}
|
||||
|
||||
let html = '<tr><th>Agent</th><th style="text-align:right">PRs</th><th style="text-align:right">Reviews</th>' +
|
||||
'<th style="text-align:right">Approved</th><th style="text-align:right">w/ Changes</th>' +
|
||||
'<th style="text-align:right">Rejected</th><th style="text-align:right">Approval Rate</th></tr>';
|
||||
|
||||
const allReasons = {};
|
||||
for (const c of cards) {
|
||||
const arColor = c.approval_rate >= 80 ? '#3fb950' : c.approval_rate >= 60 ? '#d29922' : '#f85149';
|
||||
html += '<tr>' +
|
||||
'<td><span style="display:inline-block;width:8px;height:8px;border-radius:50%;background:' + agentColor(c.agent) + ';margin-right:6px"></span>' + esc(c.agent) + '</td>' +
|
||||
'<td style="text-align:right">' + c.total_prs + '</td>' +
|
||||
'<td style="text-align:right">' + c.total_reviews + '</td>' +
|
||||
'<td style="text-align:right;color:#3fb950">' + c.approved + '</td>' +
|
||||
'<td style="text-align:right;color:#d29922">' + c.approved_with_changes + '</td>' +
|
||||
'<td style="text-align:right;color:#f85149">' + c.rejected + '</td>' +
|
||||
'<td style="text-align:right;font-weight:600;color:' + arColor + '">' + c.approval_rate.toFixed(1) + '%</td>' +
|
||||
'</tr>';
|
||||
if (c.rejection_reasons) {
|
||||
for (const [reason, cnt] of Object.entries(c.rejection_reasons)) {
|
||||
allReasons[reason] = (allReasons[reason] || 0) + cnt;
|
||||
}
|
||||
}
|
||||
}
|
||||
document.getElementById('scorecard-table').innerHTML = html;
|
||||
|
||||
// Top rejection reasons across all agents
|
||||
const sortedReasons = Object.entries(allReasons).sort((a, b) => b[1] - a[1]);
|
||||
if (sortedReasons.length > 0) {
|
||||
let rHtml = '<div style="font-size:12px;font-weight:600;color:#8b949e;margin-bottom:6px;text-transform:uppercase">Top Rejection Reasons</div>';
|
||||
rHtml += sortedReasons.map(([reason, cnt]) =>
|
||||
'<span style="display:inline-block;margin:2px 4px;padding:3px 10px;background:#f8514922;border:1px solid #f8514944;border-radius:12px;font-size:12px;color:#f85149">' +
|
||||
esc(reason) + ' <strong>' + cnt + '</strong></span>'
|
||||
).join('');
|
||||
rHtml += '<div style="margin-top:8px;font-size:11px;color:#484f58">Target: 80% approval rate. Too high = too conservative, too low = wasting pipeline compute.</div>';
|
||||
document.getElementById('scorecard-rejections').innerHTML = rHtml;
|
||||
}
|
||||
}).catch(() => {
|
||||
document.getElementById('scorecard-table').innerHTML =
|
||||
'<tr><td colspan="7" style="color:#8b949e;text-align:center">Failed to load scorecard</td></tr>';
|
||||
});
|
||||
|
||||
// --- Latest Session Digests ---
|
||||
fetch('/api/session-digest?latest=true')
|
||||
.then(r => r.json())
|
||||
.then(data => {
|
||||
const digests = data.digests || [];
|
||||
if (digests.length === 0) {
|
||||
document.getElementById('digest-container').innerHTML =
|
||||
'<div class="card" style="text-align:center;color:#8b949e">No session digests yet. Data starts flowing when agents complete research sessions.</div>';
|
||||
return;
|
||||
}
|
||||
|
||||
let html = '<div class="grid" style="grid-template-columns:repeat(auto-fit, minmax(320px, 1fr))">';
|
||||
for (const d of digests) {
|
||||
const color = agentColor(d.agent);
|
||||
const dateStr = d.date || d.timestamp || '';
|
||||
|
||||
html += '<div class="card" style="border-left:3px solid ' + color + '">' +
|
||||
'<div style="display:flex;justify-content:space-between;align-items:center;margin-bottom:8px">' +
|
||||
'<strong style="color:' + color + '">' + esc(d.agent || 'unknown') + '</strong>' +
|
||||
'<span style="font-size:11px;color:#484f58">' + esc(dateStr) + '</span>' +
|
||||
'</div>';
|
||||
|
||||
if (d.research_question) {
|
||||
html += '<div style="font-size:13px;font-style:italic;color:#c9d1d9;margin-bottom:8px">' + esc(d.research_question) + '</div>';
|
||||
}
|
||||
|
||||
if (d.key_findings && d.key_findings.length > 0) {
|
||||
html += '<div style="font-size:11px;color:#8b949e;text-transform:uppercase;margin-bottom:4px">Key Findings</div><ul style="margin:0 0 8px 16px;font-size:12px">';
|
||||
for (const f of d.key_findings) html += '<li>' + esc(f) + '</li>';
|
||||
html += '</ul>';
|
||||
}
|
||||
|
||||
if (d.surprises && d.surprises.length > 0) {
|
||||
html += '<div style="font-size:11px;color:#8b949e;text-transform:uppercase;margin-bottom:4px">Surprises</div><ul style="margin:0 0 8px 16px;font-size:12px">';
|
||||
for (const s of d.surprises) html += '<li>' + esc(s) + '</li>';
|
||||
html += '</ul>';
|
||||
}
|
||||
|
||||
if (d.confidence_shifts && d.confidence_shifts.length > 0) {
|
||||
html += '<div style="font-size:11px;color:#8b949e;text-transform:uppercase;margin-bottom:4px">Confidence Shifts</div>';
|
||||
for (const cs of d.confidence_shifts) {
|
||||
const arrow = cs.direction === 'up' ? '▲' : cs.direction === 'down' ? '▼' : '▶';
|
||||
const arrowColor = cs.direction === 'up' ? '#3fb950' : cs.direction === 'down' ? '#f85149' : '#d29922';
|
||||
html += '<div style="font-size:12px;margin-left:16px"><span style="color:' + arrowColor + '">' + arrow + '</span> ' + esc(cs.claim || cs.topic || '') + '</div>';
|
||||
}
|
||||
}
|
||||
|
||||
// Expandable details
|
||||
const detailId = 'digest-detail-' + Math.random().toString(36).substr(2, 6);
|
||||
const hasDetails = (d.sources_archived && d.sources_archived.length > 0) ||
|
||||
(d.prs_submitted && d.prs_submitted.length > 0) ||
|
||||
(d.follow_ups && d.follow_ups.length > 0);
|
||||
if (hasDetails) {
|
||||
html += '<a style="color:#58a6ff;cursor:pointer;font-size:11px;display:block;margin-top:6px" ' +
|
||||
'onclick="var e=document.getElementById(\\x27' + detailId + '\\x27);e.style.display=e.style.display===\\x27none\\x27?\\x27block\\x27:\\x27none\\x27">Details</a>';
|
||||
html += '<div id="' + detailId + '" style="display:none;margin-top:6px;font-size:12px">';
|
||||
if (d.sources_archived && d.sources_archived.length > 0) {
|
||||
html += '<div style="color:#8b949e;font-size:11px">Sources: ' + d.sources_archived.length + '</div>';
|
||||
}
|
||||
if (d.prs_submitted && d.prs_submitted.length > 0) {
|
||||
html += '<div style="color:#8b949e;font-size:11px">PRs: ' + d.prs_submitted.map(p => '#' + p).join(', ') + '</div>';
|
||||
}
|
||||
if (d.follow_ups && d.follow_ups.length > 0) {
|
||||
html += '<div style="color:#8b949e;font-size:11px;margin-top:4px">Follow-ups:</div><ul style="margin:2px 0 0 16px">';
|
||||
for (const fu of d.follow_ups) html += '<li>' + esc(fu) + '</li>';
|
||||
html += '</ul>';
|
||||
}
|
||||
html += '</div>';
|
||||
}
|
||||
|
||||
html += '</div>';
|
||||
}
|
||||
html += '</div>';
|
||||
document.getElementById('digest-container').innerHTML = html;
|
||||
}).catch(() => {
|
||||
document.getElementById('digest-container').innerHTML =
|
||||
'<div class="card" style="text-align:center;color:#8b949e">Failed to load session digests</div>';
|
||||
});
|
||||
</script>"""
|
||||
|
||||
return render_page(
|
||||
title="Agent Performance",
|
||||
subtitle="Who's contributing what?",
|
||||
active_path="/agents",
|
||||
body_html=body,
|
||||
scripts=scripts,
|
||||
timestamp=now.strftime("%Y-%m-%d %H:%M UTC"),
|
||||
)
|
||||
226
diagnostics/dashboard_epistemic.py
Normal file
226
diagnostics/dashboard_epistemic.py
Normal file
|
|
@ -0,0 +1,226 @@
|
|||
"""Page 4: Epistemic Integrity — "Can we trust what we know?"
|
||||
|
||||
Live sections:
|
||||
- Confidence calibration (from claim-index via vital signs)
|
||||
- Cascade coverage (from audit_log stage='cascade')
|
||||
- Review quality (from review_records table)
|
||||
|
||||
Placeholder sections:
|
||||
- Multi-model agreement (needs model_evals table)
|
||||
- Belief staleness (needs cascade tracking to give it meaning)
|
||||
- Divergence tracking (needs divergence events)
|
||||
"""
|
||||
|
||||
import json
|
||||
from datetime import datetime
|
||||
|
||||
from shared_ui import render_page
|
||||
|
||||
|
||||
def render_epistemic_page(vital_signs: dict, now: datetime) -> str:
|
||||
"""Render the Epistemic Integrity page."""
|
||||
|
||||
vs_conf = vital_signs.get("confidence_distribution", {})
|
||||
total_claims = sum(vs_conf.values()) if vs_conf else 0
|
||||
|
||||
# Confidence calibration table
|
||||
conf_rows = ""
|
||||
for level in ["proven", "likely", "experimental", "speculative"]:
|
||||
count = vs_conf.get(level, 0)
|
||||
pct = round(count / total_claims * 100, 1) if total_claims else 0
|
||||
conf_rows += f'<tr><td>{level}</td><td>{count}</td><td>{pct}%</td></tr>'
|
||||
|
||||
body = f"""
|
||||
<!-- Confidence Calibration (LIVE) -->
|
||||
<div class="section">
|
||||
<div class="section-title">Confidence Calibration</div>
|
||||
<div class="row">
|
||||
<div class="card">
|
||||
<table>
|
||||
<tr><th>Level</th><th>Claims</th><th>Share</th></tr>
|
||||
{conf_rows}
|
||||
</table>
|
||||
<div style="margin-top:12px;font-size:12px;color:#8b949e">
|
||||
Total claims: {total_claims}
|
||||
</div>
|
||||
</div>
|
||||
<div class="chart-container">
|
||||
<h2>Confidence Distribution</h2>
|
||||
<canvas id="confPieChart"></canvas>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Cascade Coverage (LIVE — from audit_log) -->
|
||||
<div class="section">
|
||||
<div class="section-title">Cascade Coverage</div>
|
||||
<div id="cascade-container">
|
||||
<div class="card" style="text-align:center;color:#8b949e">Loading cascade data...</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Review Quality (LIVE — from review_records table) -->
|
||||
<div class="section">
|
||||
<div class="section-title">Review Quality</div>
|
||||
<div id="review-container">
|
||||
<div class="card" style="text-align:center;color:#8b949e">Loading review data...</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Multi-Model Agreement — Placeholder -->
|
||||
<div class="section">
|
||||
<div class="section-title">Multi-Model Agreement</div>
|
||||
<div class="card" style="text-align:center;padding:40px">
|
||||
<div style="font-size:40px;margin-bottom:12px;opacity:0.3">⚙</div>
|
||||
<div style="color:#8b949e">
|
||||
Multi-model agreement rate requires the <code>model_evals</code> table.<br>
|
||||
<span style="font-size:12px">Blocked on: model_evals table creation (Ship Phase 3)</span>
|
||||
</div>
|
||||
<div style="margin-top:16px;font-size:12px;color:#8b949e">
|
||||
Current eval models: Haiku (triage), GPT-4o (domain), Sonnet/Opus (Leo).<br>
|
||||
Agreement tracking needs per-model verdicts stored separately.
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Belief Staleness — Placeholder -->
|
||||
<div class="section">
|
||||
<div class="section-title">Belief Staleness</div>
|
||||
<div class="card" style="text-align:center;padding:40px">
|
||||
<div style="font-size:40px;margin-bottom:12px;opacity:0.3">⏲</div>
|
||||
<div style="color:#8b949e">
|
||||
Belief staleness scan will compare belief file <code>depends_on</code> frontmatter<br>
|
||||
against claim <code>merged_at</code> timestamps.<br>
|
||||
<span style="font-size:12px">Ready to implement once cascade tracking accumulates data</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
"""
|
||||
|
||||
scripts = f"""<script>
|
||||
// Confidence pie chart
|
||||
const confData = {json.dumps(vs_conf)};
|
||||
const confLabels = Object.keys(confData);
|
||||
const confValues = Object.values(confData);
|
||||
if (confLabels.length > 0) {{
|
||||
const confColors = {{ 'proven': '#3fb950', 'likely': '#58a6ff', 'experimental': '#d29922', 'speculative': '#f85149', 'unknown': '#8b949e' }};
|
||||
new Chart(document.getElementById('confPieChart'), {{
|
||||
type: 'doughnut',
|
||||
data: {{
|
||||
labels: confLabels,
|
||||
datasets: [{{
|
||||
data: confValues,
|
||||
backgroundColor: confLabels.map(l => confColors[l] || '#8b949e'),
|
||||
borderColor: '#161b22',
|
||||
borderWidth: 2,
|
||||
}}],
|
||||
}},
|
||||
options: {{
|
||||
responsive: true,
|
||||
plugins: {{
|
||||
legend: {{ position: 'right', labels: {{ boxWidth: 12 }} }},
|
||||
}},
|
||||
}},
|
||||
}});
|
||||
}}
|
||||
|
||||
// --- Cascade Coverage (live) ---
|
||||
fetch('/api/cascade-coverage?days=30')
|
||||
.then(r => r.json())
|
||||
.then(data => {{
|
||||
const el = document.getElementById('cascade-container');
|
||||
if (data.total_triggered === 0) {{
|
||||
el.innerHTML = `
|
||||
<div class="card" style="text-align:center;padding:30px">
|
||||
<div style="font-size:14px;color:#d29922">No cascade events recorded yet</div>
|
||||
<div style="font-size:12px;color:#8b949e;margin-top:8px">
|
||||
Cascade instrumentation is deployed. Events will appear as new PRs flow through eval and trigger belief/position reviews.
|
||||
</div>
|
||||
</div>`;
|
||||
return;
|
||||
}}
|
||||
|
||||
const compRate = data.completion_rate != null ? (data.completion_rate * 100).toFixed(1) + '%' : '--';
|
||||
const compColor = data.completion_rate >= 0.7 ? '#3fb950' : data.completion_rate >= 0.4 ? '#d29922' : '#f85149';
|
||||
|
||||
let agentRows = '';
|
||||
for (const a of (data.by_agent || [])) {{
|
||||
agentRows += '<tr><td>' + esc(a.agent) + '</td><td>' + a.triggered + '</td><td>' + a.claims_affected + '</td></tr>';
|
||||
}}
|
||||
|
||||
el.innerHTML = `
|
||||
<div class="grid">
|
||||
<div class="card"><div class="label">Cascades Triggered</div><div class="hero-value">${{data.total_triggered}}</div></div>
|
||||
<div class="card"><div class="label">Cascades Reviewed</div><div class="hero-value">${{data.total_reviewed}}</div></div>
|
||||
<div class="card"><div class="label">Completion Rate</div><div class="hero-value" style="color:${{compColor}}">${{compRate}}</div></div>
|
||||
<div class="card"><div class="label">Merges w/ Cascade</div><div class="hero-value">${{data.merges_with_cascade}}</div></div>
|
||||
</div>
|
||||
<div class="card" style="margin-top:12px">
|
||||
<table>
|
||||
<tr><th>Agent</th><th>Cascades Triggered</th><th>Claims Affected</th></tr>
|
||||
${{agentRows || '<tr><td colspan="3" style="color:#8b949e">No per-agent data</td></tr>'}}
|
||||
</table>
|
||||
</div>`;
|
||||
}}).catch(() => {{
|
||||
document.getElementById('cascade-container').innerHTML =
|
||||
'<div class="card" style="color:#f85149">Failed to load cascade data</div>';
|
||||
}});
|
||||
|
||||
// --- Review Quality (live from review_records) ---
|
||||
fetch('/api/review-summary?days=30')
|
||||
.then(r => r.json())
|
||||
.then(data => {{
|
||||
const el = document.getElementById('review-container');
|
||||
if (!data.populated) {{
|
||||
el.innerHTML = `
|
||||
<div class="card" style="text-align:center;padding:30px">
|
||||
<div style="font-size:14px;color:#d29922">Review records table is empty</div>
|
||||
<div style="font-size:12px;color:#8b949e;margin-top:8px">
|
||||
review_records (migration v12) is deployed. Structured review data will populate as new PRs are evaluated.
|
||||
</div>
|
||||
</div>`;
|
||||
return;
|
||||
}}
|
||||
|
||||
const outcomes = data.outcomes || {{}};
|
||||
const approved = (outcomes['approved'] || 0) + (outcomes['approved-with-changes'] || 0);
|
||||
const rejected = outcomes['rejected'] || 0;
|
||||
const approvalRate = data.total > 0 ? ((approved / data.total) * 100).toFixed(1) : '--';
|
||||
const approvalColor = approved / data.total >= 0.7 ? '#3fb950' : approved / data.total >= 0.5 ? '#d29922' : '#f85149';
|
||||
|
||||
// Rejection reasons
|
||||
let reasonRows = '';
|
||||
for (const r of (data.rejection_reasons || [])) {{
|
||||
reasonRows += '<tr><td><code>' + esc(r.reason) + '</code></td><td>' + r.count + '</td></tr>';
|
||||
}}
|
||||
|
||||
el.innerHTML = `
|
||||
<div class="grid">
|
||||
<div class="card"><div class="label">Total Reviews</div><div class="hero-value">${{data.total}}</div></div>
|
||||
<div class="card"><div class="label">Approval Rate</div><div class="hero-value" style="color:${{approvalColor}}">${{approvalRate}}%</div></div>
|
||||
<div class="card"><div class="label">Approved w/ Changes</div><div class="hero-value" style="color:#d29922">${{outcomes['approved-with-changes'] || 0}}</div></div>
|
||||
<div class="card"><div class="label">Rejected</div><div class="hero-value" style="color:#f85149">${{rejected}}</div></div>
|
||||
</div>
|
||||
<div class="row" style="margin-top:12px">
|
||||
<div class="card">
|
||||
<div style="font-weight:600;margin-bottom:8px">Rejection Reasons</div>
|
||||
<table>
|
||||
<tr><th>Reason</th><th>Count</th></tr>
|
||||
${{reasonRows || '<tr><td colspan="2" style="color:#8b949e">No rejections</td></tr>'}}
|
||||
</table>
|
||||
</div>
|
||||
</div>`;
|
||||
}}).catch(() => {{
|
||||
document.getElementById('review-container').innerHTML =
|
||||
'<div class="card" style="color:#f85149">Failed to load review data</div>';
|
||||
}});
|
||||
</script>"""
|
||||
|
||||
return render_page(
|
||||
title="Epistemic Integrity",
|
||||
subtitle="Can we trust what we know?",
|
||||
active_path="/epistemic",
|
||||
body_html=body,
|
||||
scripts=scripts,
|
||||
timestamp=now.strftime("%Y-%m-%d %H:%M UTC"),
|
||||
)
|
||||
223
diagnostics/dashboard_health.py
Normal file
223
diagnostics/dashboard_health.py
Normal file
|
|
@ -0,0 +1,223 @@
|
|||
"""Page 2: Knowledge Health — "What do we know and how good is it?"
|
||||
|
||||
Renders: claims by domain, Herfindahl index, evidence freshness,
|
||||
orphan ratio, link density, confidence distribution, extraction yield.
|
||||
|
||||
Data sources: /api/vital-signs, /api/herfindahl, /api/extraction-yield-by-domain,
|
||||
/api/domains, claim-index (cached).
|
||||
"""
|
||||
|
||||
import json
|
||||
from datetime import datetime
|
||||
|
||||
from shared_ui import render_page
|
||||
|
||||
|
||||
def render_health_page(vital_signs: dict, domain_breakdown: dict, now: datetime) -> str:
|
||||
"""Render the Knowledge Health page."""
|
||||
|
||||
# --- Vital signs data ---
|
||||
vs_orphan = vital_signs.get("orphan_ratio", {})
|
||||
orphan_ratio_val = vs_orphan.get("ratio")
|
||||
orphan_color = {"healthy": "green", "warning": "yellow", "critical": "red"}.get(vs_orphan.get("status", ""), "")
|
||||
orphan_display = f"{orphan_ratio_val:.1%}" if orphan_ratio_val is not None else "—"
|
||||
|
||||
vs_linkage = vital_signs.get("linkage_density") or {}
|
||||
linkage_display = f'{vs_linkage.get("avg_outgoing_links", "—")}'
|
||||
cross_domain_ratio = vs_linkage.get("cross_domain_ratio")
|
||||
cross_domain_color = "green" if cross_domain_ratio and cross_domain_ratio >= 0.15 else (
|
||||
"yellow" if cross_domain_ratio and cross_domain_ratio >= 0.05 else "red"
|
||||
) if cross_domain_ratio is not None else ""
|
||||
|
||||
vs_fresh = vital_signs.get("evidence_freshness") or {}
|
||||
fresh_display = f'{vs_fresh.get("median_age_days", "—")}' if vs_fresh.get("median_age_days") else "—"
|
||||
fresh_pct = vs_fresh.get("fresh_30d_pct", 0)
|
||||
|
||||
vs_conf = vital_signs.get("confidence_distribution", {})
|
||||
|
||||
# Domain activity
|
||||
stagnant = vital_signs.get("domain_activity", {}).get("stagnant", [])
|
||||
active_domains = vital_signs.get("domain_activity", {}).get("active", [])
|
||||
|
||||
claim_status = vital_signs.get("claim_index_status", "unavailable")
|
||||
|
||||
# Domain breakdown table
|
||||
domain_rows = ""
|
||||
for domain, stats in sorted(domain_breakdown.items(), key=lambda x: x[1].get("knowledge_prs", 0), reverse=True):
|
||||
if stats.get("knowledge_prs", 0) > 0:
|
||||
top_contribs = ", ".join(f'{c["handle"]} ({c["claims"]})' for c in stats.get("contributors", [])[:3])
|
||||
domain_rows += f"""<tr>
|
||||
<td style="color:#58a6ff">{domain}</td>
|
||||
<td>{stats["knowledge_prs"]}</td>
|
||||
<td>{stats["total_prs"]}</td>
|
||||
<td style="font-size:12px;color:#8b949e">{top_contribs}</td>
|
||||
</tr>"""
|
||||
|
||||
body = f"""
|
||||
<!-- Vital Signs Cards -->
|
||||
<div class="grid">
|
||||
<div class="card">
|
||||
<div class="label">Orphan Ratio</div>
|
||||
<div class="value {orphan_color}">{orphan_display}</div>
|
||||
<div class="detail">{vs_orphan.get("count", "?")} / {vs_orphan.get("total", "?")} claims · target <15%</div>
|
||||
</div>
|
||||
<div class="card">
|
||||
<div class="label">Avg Links/Claim</div>
|
||||
<div class="value">{linkage_display}</div>
|
||||
<div class="detail">cross-domain: <span class="{cross_domain_color}">{f"{cross_domain_ratio:.1%}" if cross_domain_ratio is not None else "—"}</span> · target 15-30%</div>
|
||||
</div>
|
||||
<div class="card">
|
||||
<div class="label">Evidence Freshness</div>
|
||||
<div class="value">{fresh_display}<span style="font-size:14px;color:#8b949e">d median</span></div>
|
||||
<div class="detail">{vs_fresh.get("fresh_30d_count", "?")} claims <30d old · {fresh_pct:.0f}% fresh</div>
|
||||
</div>
|
||||
<div class="card">
|
||||
<div class="label">Confidence Spread</div>
|
||||
<div class="value" style="font-size:16px">{" / ".join(f"{vs_conf.get(k, 0)}" for k in ["proven", "likely", "experimental", "speculative"])}</div>
|
||||
<div class="detail">proven / likely / experimental / speculative</div>
|
||||
</div>
|
||||
<div class="card">
|
||||
<div class="label">Claim Index</div>
|
||||
<div class="value {'green' if claim_status == 'live' else 'red'}">{claim_status}</div>
|
||||
<div class="detail">{vs_orphan.get("total", "?")} claims indexed</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Herfindahl + Domain Yield (loaded via JS) -->
|
||||
<div class="row">
|
||||
<div class="section">
|
||||
<div class="section-title">Domain Concentration</div>
|
||||
<div id="herfindahl-container" class="card" style="text-align:center;padding:24px">
|
||||
<div class="label">Loading...</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="section">
|
||||
<div class="section-title">Extraction Yield by Domain</div>
|
||||
<div id="yield-domain-container" class="card">
|
||||
<div style="color:#8b949e;text-align:center;padding:16px">Loading...</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Charts -->
|
||||
<div class="row">
|
||||
<div class="chart-container">
|
||||
<h2>Claims by Domain</h2>
|
||||
<canvas id="domainChart"></canvas>
|
||||
</div>
|
||||
<div class="chart-container">
|
||||
<h2>Confidence Distribution</h2>
|
||||
<canvas id="confidenceChart"></canvas>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Domain Breakdown Table -->
|
||||
<div class="section">
|
||||
<div class="section-title">Contributions by Domain</div>
|
||||
<div class="card">
|
||||
<table>
|
||||
<tr><th>Domain</th><th>Knowledge PRs</th><th>Total PRs</th><th>Top Contributors</th></tr>
|
||||
{domain_rows if domain_rows else "<tr><td colspan='4' style='color:#8b949e'>No domain data</td></tr>"}
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Stagnation Alerts -->
|
||||
{"" if not stagnant else f'''
|
||||
<div class="section">
|
||||
<div class="section-title" style="color:#d29922">Stagnation Alerts</div>
|
||||
<div class="card">
|
||||
<p style="color:#d29922">Domains with no PR activity in 7 days: <strong>{", ".join(stagnant)}</strong></p>
|
||||
</div>
|
||||
</div>
|
||||
'''}
|
||||
"""
|
||||
|
||||
scripts = f"""<script>
|
||||
// --- Herfindahl index ---
|
||||
fetch('/api/herfindahl?days=30')
|
||||
.then(r => r.json())
|
||||
.then(data => {{
|
||||
const container = document.getElementById('herfindahl-container');
|
||||
const statusColor = data.status === 'diverse' ? 'green' : data.status === 'moderate' ? 'yellow' : 'red';
|
||||
let domainsHtml = data.domains.map(d =>
|
||||
'<div style="display:flex;justify-content:space-between;padding:4px 0;border-bottom:1px solid #21262d">' +
|
||||
'<span>' + esc(d.domain) + '</span>' +
|
||||
'<span style="color:#8b949e">' + d.count + ' (' + (d.share * 100).toFixed(1) + '%)</span></div>'
|
||||
).join('');
|
||||
container.innerHTML =
|
||||
'<div class="value ' + statusColor + '">' + data.hhi.toFixed(4) + '</div>' +
|
||||
'<div class="detail">' + data.status + ' · ' + data.total_merged + ' merged (30d)</div>' +
|
||||
'<div style="margin-top:12px;text-align:left">' + domainsHtml + '</div>';
|
||||
}}).catch(() => {{}});
|
||||
|
||||
// --- Extraction yield by domain ---
|
||||
fetch('/api/extraction-yield-by-domain?days=30')
|
||||
.then(r => r.json())
|
||||
.then(data => {{
|
||||
const container = document.getElementById('yield-domain-container');
|
||||
if (!data.domains || data.domains.length === 0) {{
|
||||
container.innerHTML = '<div style="color:#8b949e;text-align:center;padding:16px">No yield data</div>';
|
||||
return;
|
||||
}}
|
||||
let html = '<table><tr><th>Domain</th><th>PRs</th><th>Merged</th><th>Yield</th></tr>';
|
||||
data.domains.forEach(d => {{
|
||||
const yieldColor = d.yield >= 0.5 ? 'green' : d.yield >= 0.3 ? 'yellow' : 'red';
|
||||
html += '<tr><td>' + esc(d.domain) + '</td><td>' + d.total_prs + '</td>' +
|
||||
'<td>' + d.merged + '</td><td class="' + yieldColor + '">' + (d.yield * 100).toFixed(1) + '%</td></tr>';
|
||||
}});
|
||||
html += '</table>';
|
||||
container.innerHTML = html;
|
||||
}}).catch(() => {{}});
|
||||
|
||||
// --- Domain distribution chart ---
|
||||
const domainData = {json.dumps({d: s.get("knowledge_prs", 0) for d, s in domain_breakdown.items() if s.get("knowledge_prs", 0) > 0})};
|
||||
const domainLabels = Object.keys(domainData);
|
||||
const domainValues = Object.values(domainData);
|
||||
if (domainLabels.length > 0) {{
|
||||
const colors = ['#58a6ff', '#3fb950', '#d29922', '#f0883e', '#bc8cff', '#f85149', '#8b949e', '#ec4899'];
|
||||
new Chart(document.getElementById('domainChart'), {{
|
||||
type: 'doughnut',
|
||||
data: {{
|
||||
labels: domainLabels,
|
||||
datasets: [{{ data: domainValues, backgroundColor: domainLabels.map((_, i) => colors[i % colors.length]), borderColor: '#161b22', borderWidth: 2 }}],
|
||||
}},
|
||||
options: {{
|
||||
responsive: true,
|
||||
plugins: {{ legend: {{ position: 'right', labels: {{ boxWidth: 12, font: {{ size: 11 }} }} }} }},
|
||||
}},
|
||||
}});
|
||||
}}
|
||||
|
||||
// --- Confidence distribution chart ---
|
||||
const confData = {json.dumps(vs_conf)};
|
||||
const confLabels = Object.keys(confData);
|
||||
const confValues = Object.values(confData);
|
||||
if (confLabels.length > 0) {{
|
||||
const confColors = {{ 'proven': '#3fb950', 'likely': '#58a6ff', 'experimental': '#d29922', 'speculative': '#f85149', 'unknown': '#8b949e' }};
|
||||
new Chart(document.getElementById('confidenceChart'), {{
|
||||
type: 'bar',
|
||||
data: {{
|
||||
labels: confLabels,
|
||||
datasets: [{{ data: confValues, backgroundColor: confLabels.map(l => confColors[l] || '#8b949e') }}],
|
||||
}},
|
||||
options: {{
|
||||
responsive: true,
|
||||
plugins: {{ legend: {{ display: false }} }},
|
||||
scales: {{
|
||||
y: {{ title: {{ display: true, text: 'Claims' }}, min: 0 }},
|
||||
x: {{ grid: {{ display: false }} }},
|
||||
}},
|
||||
}},
|
||||
}});
|
||||
}}
|
||||
</script>"""
|
||||
|
||||
return render_page(
|
||||
title="Knowledge Health",
|
||||
subtitle="What do we know and how good is it?",
|
||||
active_path="/health",
|
||||
body_html=body,
|
||||
scripts=scripts,
|
||||
timestamp=now.strftime("%Y-%m-%d %H:%M UTC"),
|
||||
)
|
||||
464
diagnostics/dashboard_ops.py
Normal file
464
diagnostics/dashboard_ops.py
Normal file
|
|
@ -0,0 +1,464 @@
|
|||
"""Page 1: Pipeline Operations — "Is the machine running?"
|
||||
|
||||
Renders: queue depth, throughput, error rate, stage flow, breakers,
|
||||
funnel, rejection reasons, fix cycle, time-series charts.
|
||||
|
||||
All data comes from existing endpoints: /api/metrics, /api/snapshots,
|
||||
/api/stage-times, /api/alerts, /api/fix-rates.
|
||||
"""
|
||||
|
||||
import json
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from shared_ui import render_page
|
||||
|
||||
|
||||
def render_ops_page(metrics: dict, snapshots: list, changes: list,
|
||||
vital_signs: dict, now: datetime) -> str:
|
||||
"""Render the Pipeline Operations page."""
|
||||
|
||||
# --- Prepare chart data ---
|
||||
timestamps = [s["ts"] for s in snapshots]
|
||||
throughput_data = [s.get("throughput_1h", 0) for s in snapshots]
|
||||
approval_data = [(s.get("approval_rate") or 0) * 100 for s in snapshots]
|
||||
open_prs_data = [s.get("open_prs", 0) for s in snapshots]
|
||||
merged_data = [s.get("merged_total", 0) for s in snapshots]
|
||||
|
||||
rej_wiki = [s.get("rejection_broken_wiki_links", 0) for s in snapshots]
|
||||
rej_schema = [s.get("rejection_frontmatter_schema", 0) for s in snapshots]
|
||||
rej_dup = [s.get("rejection_near_duplicate", 0) for s in snapshots]
|
||||
rej_conf = [s.get("rejection_confidence", 0) for s in snapshots]
|
||||
rej_other = [s.get("rejection_other", 0) for s in snapshots]
|
||||
|
||||
# origin_agent/origin_human removed — replaced by /api/growth chart
|
||||
|
||||
annotations_js = json.dumps([
|
||||
{
|
||||
"type": "line", "xMin": c["ts"], "xMax": c["ts"],
|
||||
"borderColor": "#d29922" if c["type"] == "prompt" else "#58a6ff",
|
||||
"borderWidth": 1, "borderDash": [4, 4],
|
||||
"label": {"display": True, "content": f"{c['type']}: {c.get('to', '?')}",
|
||||
"position": "start", "backgroundColor": "#161b22",
|
||||
"color": "#8b949e", "font": {"size": 10}},
|
||||
}
|
||||
for c in changes
|
||||
])
|
||||
|
||||
# --- Status helpers ---
|
||||
sm = metrics["status_map"]
|
||||
ar = metrics["approval_rate"]
|
||||
ar_color = "green" if ar > 0.5 else ("yellow" if ar > 0.2 else "red")
|
||||
fr_color = "green" if metrics["fix_rate"] > 0.3 else ("yellow" if metrics["fix_rate"] > 0.1 else "red")
|
||||
|
||||
vs_review = vital_signs["review_throughput"]
|
||||
vs_status_color = {"healthy": "green", "warning": "yellow", "critical": "red"}.get(vs_review["status"], "yellow")
|
||||
|
||||
# --- Rejection reasons table ---
|
||||
reason_rows = "".join(
|
||||
f'<tr><td><code>{r["tag"]}</code></td><td>{r["unique_prs"]}</td>'
|
||||
f'<td style="color:#8b949e">{r["count"]}</td></tr>'
|
||||
for r in metrics["rejection_reasons"]
|
||||
)
|
||||
|
||||
# --- Breaker rows ---
|
||||
breaker_rows = ""
|
||||
for name, info in metrics["breakers"].items():
|
||||
state = info["state"]
|
||||
color = "green" if state == "closed" else ("red" if state == "open" else "yellow")
|
||||
age = f'{info.get("age_s", "?")}s ago' if "age_s" in info else "-"
|
||||
breaker_rows += f'<tr><td>{name}</td><td class="{color}">{state}</td><td>{info["failures"]}</td><td>{age}</td></tr>'
|
||||
|
||||
# --- Funnel ---
|
||||
funnel = vital_signs["funnel"]
|
||||
|
||||
# --- Queue staleness ---
|
||||
qs = vital_signs.get("queue_staleness", {})
|
||||
stale_count = qs.get("stale_count", 0)
|
||||
stale_status = qs.get("status", "healthy")
|
||||
stale_color = {"healthy": "green", "warning": "yellow", "critical": "red"}.get(stale_status, "")
|
||||
|
||||
body = f"""
|
||||
<!-- Hero Cards -->
|
||||
<div class="grid">
|
||||
<div class="card">
|
||||
<div class="label">Throughput</div>
|
||||
<div class="value">{metrics["throughput_1h"]}<span style="font-size:14px;color:#8b949e">/hr</span></div>
|
||||
<div class="detail">merged last hour</div>
|
||||
</div>
|
||||
<div class="card">
|
||||
<div class="label">Approval Rate (24h)</div>
|
||||
<div class="value {ar_color}">{ar:.1%}</div>
|
||||
<div class="detail">{metrics["approved_24h"]}/{metrics["evaluated_24h"]} evaluated</div>
|
||||
</div>
|
||||
<div class="card">
|
||||
<div class="label">Review Backlog</div>
|
||||
<div class="value {vs_status_color}">{vs_review["backlog"]}</div>
|
||||
<div class="detail">{vs_review["open_prs"]} open + {vs_review["reviewing_prs"]} reviewing + {vs_review["approved_waiting"]} approved</div>
|
||||
</div>
|
||||
<div class="card">
|
||||
<div class="label">Merged Total</div>
|
||||
<div class="value green">{sm.get("merged", 0)}</div>
|
||||
<div class="detail">{sm.get("closed", 0)} closed</div>
|
||||
</div>
|
||||
<div class="card">
|
||||
<div class="label">Fix Success</div>
|
||||
<div class="value {fr_color}">{metrics["fix_rate"]:.1%}</div>
|
||||
<div class="detail">{metrics["fix_succeeded"]}/{metrics["fix_attempted"]} fixed</div>
|
||||
</div>
|
||||
<div class="card">
|
||||
<div class="label">Time to Merge</div>
|
||||
<div class="value">{f"{metrics['median_ttm_minutes']:.0f}" if metrics["median_ttm_minutes"] else "—"}<span style="font-size:14px;color:#8b949e">min</span></div>
|
||||
<div class="detail">median (24h)</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Alert Banner (loaded via JS) -->
|
||||
<div id="alert-banner"></div>
|
||||
|
||||
<!-- Pipeline Funnel -->
|
||||
<div class="section">
|
||||
<div class="section-title">Pipeline Funnel</div>
|
||||
<div class="funnel">
|
||||
<div class="funnel-step"><div class="num">{funnel["sources_total"]}</div><div class="lbl">Sources</div></div>
|
||||
<div class="funnel-arrow">→</div>
|
||||
<div class="funnel-step"><div class="num" style="color:#f0883e">{funnel["sources_queued"]}</div><div class="lbl">In Queue</div></div>
|
||||
<div class="funnel-arrow">→</div>
|
||||
<div class="funnel-step"><div class="num">{funnel["sources_extracted"]}</div><div class="lbl">Extracted</div></div>
|
||||
<div class="funnel-arrow">→</div>
|
||||
<div class="funnel-step"><div class="num">{funnel["prs_total"]}</div><div class="lbl">PRs Created</div></div>
|
||||
<div class="funnel-arrow">→</div>
|
||||
<div class="funnel-step"><div class="num green">{funnel["prs_merged"]}</div><div class="lbl">Merged</div></div>
|
||||
<div class="funnel-arrow">→</div>
|
||||
<div class="funnel-step"><div class="num blue">{funnel["conversion_rate"]:.1%}</div><div class="lbl">Conversion</div></div>
|
||||
</div>
|
||||
<div style="margin-top:8px;font-size:12px;color:#8b949e">
|
||||
Queue staleness: <span class="{stale_color}">{stale_count} stale</span>
|
||||
{f'(oldest: {qs.get("oldest_age_days", "?")}d)' if stale_count > 0 else ""}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Stage Dwell Times (loaded via JS) -->
|
||||
<div class="section">
|
||||
<div class="section-title">Stage Dwell Times</div>
|
||||
<div id="stage-times-container" class="grid"></div>
|
||||
</div>
|
||||
|
||||
<!-- Charts -->
|
||||
<div id="no-chart-data" class="card" style="text-align:center;padding:40px;margin:16px 0;display:none">
|
||||
<p style="color:#8b949e">No time-series data yet.</p>
|
||||
</div>
|
||||
<div id="chart-section">
|
||||
<div class="row">
|
||||
<div class="chart-container">
|
||||
<h2>Throughput & Approval Rate</h2>
|
||||
<canvas id="throughputChart"></canvas>
|
||||
</div>
|
||||
<div class="chart-container">
|
||||
<h2>Rejection Reasons Over Time</h2>
|
||||
<canvas id="rejectionChart"></canvas>
|
||||
</div>
|
||||
</div>
|
||||
<div class="row">
|
||||
<div class="chart-container">
|
||||
<h2>PR Backlog</h2>
|
||||
<canvas id="backlogChart"></canvas>
|
||||
</div>
|
||||
<div class="chart-container">
|
||||
<h2>Cumulative Growth</h2>
|
||||
<canvas id="growthChart"></canvas>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- PR Trace Lookup -->
|
||||
<div class="section">
|
||||
<div class="section-title">PR Trace Lookup</div>
|
||||
<div class="card">
|
||||
<div style="display:flex;gap:8px;align-items:center">
|
||||
<input id="trace-pr-input" type="number" placeholder="Enter PR number"
|
||||
style="background:#0d1117;border:1px solid #30363d;color:#c9d1d9;padding:8px 12px;border-radius:6px;width:180px;font-size:14px">
|
||||
<button onclick="loadTrace()" style="background:#238636;color:#fff;border:none;padding:8px 16px;border-radius:6px;cursor:pointer;font-size:13px;font-weight:600">Trace</button>
|
||||
</div>
|
||||
<div id="trace-result" style="margin-top:12px"></div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Tables -->
|
||||
<div class="row">
|
||||
<div class="section">
|
||||
<div class="section-title">Top Rejection Reasons (24h)</div>
|
||||
<div class="card">
|
||||
<table>
|
||||
<tr><th>Issue</th><th>PRs</th><th style="color:#8b949e">Events</th></tr>
|
||||
{reason_rows if reason_rows else "<tr><td colspan='3' style='color:#8b949e'>No rejections in 24h</td></tr>"}
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
<div class="section">
|
||||
<div class="section-title">Circuit Breakers</div>
|
||||
<div class="card">
|
||||
<table>
|
||||
<tr><th>Stage</th><th>State</th><th>Failures</th><th>Last Success</th></tr>
|
||||
{breaker_rows if breaker_rows else "<tr><td colspan='4' style='color:#8b949e'>No breaker data</td></tr>"}
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
"""
|
||||
|
||||
scripts = f"""<script>
|
||||
const timestamps = {json.dumps(timestamps)};
|
||||
|
||||
// --- Alerts banner ---
|
||||
fetch('/api/alerts')
|
||||
.then(r => r.json())
|
||||
.then(data => {{
|
||||
if (data.alerts && data.alerts.length > 0) {{
|
||||
const critical = data.alerts.filter(a => a.severity === 'critical');
|
||||
const warning = data.alerts.filter(a => a.severity === 'warning');
|
||||
let html = '';
|
||||
if (critical.length > 0) {{
|
||||
html += '<div class="alert-banner alert-critical">' +
|
||||
critical.map(a => '!! ' + esc(a.title)).join('<br>') + '</div>';
|
||||
}}
|
||||
if (warning.length > 0) {{
|
||||
html += '<div class="alert-banner alert-warning">' +
|
||||
warning.map(a => '! ' + esc(a.title)).join('<br>') + '</div>';
|
||||
}}
|
||||
document.getElementById('alert-banner').innerHTML = html;
|
||||
}}
|
||||
}}).catch(() => {{}});
|
||||
|
||||
// --- Stage dwell times ---
|
||||
fetch('/api/stage-times?hours=24')
|
||||
.then(r => r.json())
|
||||
.then(data => {{
|
||||
const container = document.getElementById('stage-times-container');
|
||||
const stages = data.stages || {{}};
|
||||
if (Object.keys(stages).length === 0) {{
|
||||
container.innerHTML = '<div class="card" style="grid-column:1/-1;text-align:center;color:#8b949e">No stage timing data yet</div>';
|
||||
return;
|
||||
}}
|
||||
let html = '';
|
||||
for (const [label, info] of Object.entries(stages)) {{
|
||||
const color = info.median_minutes < 5 ? 'green' : info.median_minutes < 30 ? 'yellow' : 'red';
|
||||
html += '<div class="card"><div class="label">' + esc(label) + '</div>' +
|
||||
'<div class="value ' + color + '">' + info.median_minutes.toFixed(1) + '<span style="font-size:14px;color:#8b949e">min</span></div>' +
|
||||
'<div class="detail">median (' + info.count + ' PRs)' +
|
||||
(info.p90_minutes ? ' · p90: ' + info.p90_minutes.toFixed(1) + 'min' : '') +
|
||||
'</div></div>';
|
||||
}}
|
||||
container.innerHTML = html;
|
||||
}}).catch(() => {{}});
|
||||
|
||||
// --- Time-series charts ---
|
||||
if (timestamps.length === 0) {{
|
||||
document.getElementById('chart-section').style.display = 'none';
|
||||
document.getElementById('no-chart-data').style.display = 'block';
|
||||
}} else {{
|
||||
|
||||
const throughputData = {json.dumps(throughput_data)};
|
||||
const approvalData = {json.dumps(approval_data)};
|
||||
const openPrsData = {json.dumps(open_prs_data)};
|
||||
const mergedData = {json.dumps(merged_data)};
|
||||
const rejWiki = {json.dumps(rej_wiki)};
|
||||
const rejSchema = {json.dumps(rej_schema)};
|
||||
const rejDup = {json.dumps(rej_dup)};
|
||||
const rejConf = {json.dumps(rej_conf)};
|
||||
const rejOther = {json.dumps(rej_other)};
|
||||
const annotations = {annotations_js};
|
||||
|
||||
new Chart(document.getElementById('throughputChart'), {{
|
||||
type: 'line',
|
||||
data: {{
|
||||
labels: timestamps,
|
||||
datasets: [
|
||||
{{ label: 'Throughput/hr', data: throughputData, borderColor: '#58a6ff', backgroundColor: 'rgba(88,166,255,0.1)', fill: true, tension: 0.3, yAxisID: 'y', pointRadius: 1 }},
|
||||
{{ label: 'Approval %', data: approvalData, borderColor: '#3fb950', borderDash: [4,2], tension: 0.3, yAxisID: 'y1', pointRadius: 1 }},
|
||||
],
|
||||
}},
|
||||
options: {{
|
||||
responsive: true,
|
||||
interaction: {{ mode: 'index', intersect: false }},
|
||||
scales: {{
|
||||
x: {{ type: 'time', time: {{ unit: 'hour', displayFormats: {{ hour: 'MMM d HH:mm' }} }}, grid: {{ display: false }} }},
|
||||
y: {{ position: 'left', title: {{ display: true, text: 'PRs/hr' }}, min: 0 }},
|
||||
y1: {{ position: 'right', title: {{ display: true, text: 'Approval %' }}, min: 0, max: 100, grid: {{ drawOnChartArea: false }} }},
|
||||
}},
|
||||
plugins: {{ annotation: {{ annotations }}, legend: {{ labels: {{ boxWidth: 12 }} }} }},
|
||||
}},
|
||||
}});
|
||||
|
||||
new Chart(document.getElementById('rejectionChart'), {{
|
||||
type: 'line',
|
||||
data: {{
|
||||
labels: timestamps,
|
||||
datasets: [
|
||||
{{ label: 'Wiki Links', data: rejWiki, borderColor: '#f85149', backgroundColor: 'rgba(248,81,73,0.2)', fill: true, tension: 0.3, pointRadius: 0 }},
|
||||
{{ label: 'Schema', data: rejSchema, borderColor: '#d29922', backgroundColor: 'rgba(210,153,34,0.2)', fill: true, tension: 0.3, pointRadius: 0 }},
|
||||
{{ label: 'Duplicate', data: rejDup, borderColor: '#8b949e', backgroundColor: 'rgba(139,148,158,0.2)', fill: true, tension: 0.3, pointRadius: 0 }},
|
||||
{{ label: 'Confidence', data: rejConf, borderColor: '#bc8cff', backgroundColor: 'rgba(188,140,255,0.2)', fill: true, tension: 0.3, pointRadius: 0 }},
|
||||
{{ label: 'Other', data: rejOther, borderColor: '#6e7681', backgroundColor: 'rgba(110,118,129,0.15)', fill: true, tension: 0.3, pointRadius: 0 }},
|
||||
],
|
||||
}},
|
||||
options: {{
|
||||
responsive: true,
|
||||
scales: {{
|
||||
x: {{ type: 'time', time: {{ unit: 'hour', displayFormats: {{ hour: 'MMM d HH:mm' }} }}, grid: {{ display: false }} }},
|
||||
y: {{ stacked: true, min: 0, title: {{ display: true, text: 'Count (24h)' }} }},
|
||||
}},
|
||||
plugins: {{ annotation: {{ annotations }}, legend: {{ labels: {{ boxWidth: 12 }} }} }},
|
||||
}},
|
||||
}});
|
||||
|
||||
new Chart(document.getElementById('backlogChart'), {{
|
||||
type: 'line',
|
||||
data: {{
|
||||
labels: timestamps,
|
||||
datasets: [
|
||||
{{ label: 'Open PRs', data: openPrsData, borderColor: '#d29922', backgroundColor: 'rgba(210,153,34,0.15)', fill: true, tension: 0.3, pointRadius: 1 }},
|
||||
{{ label: 'Merged (total)', data: mergedData, borderColor: '#3fb950', tension: 0.3, pointRadius: 1 }},
|
||||
],
|
||||
}},
|
||||
options: {{
|
||||
responsive: true,
|
||||
scales: {{
|
||||
x: {{ type: 'time', time: {{ unit: 'hour', displayFormats: {{ hour: 'MMM d HH:mm' }} }}, grid: {{ display: false }} }},
|
||||
y: {{ min: 0, title: {{ display: true, text: 'PRs' }} }},
|
||||
}},
|
||||
plugins: {{ legend: {{ labels: {{ boxWidth: 12 }} }} }},
|
||||
}},
|
||||
}});
|
||||
|
||||
}} // end if timestamps
|
||||
|
||||
// Growth chart loaded async from /api/growth (independent of snapshots)
|
||||
fetch('/api/growth?days=90')
|
||||
.then(r => r.json())
|
||||
.then(data => {{
|
||||
if (!data.dates || data.dates.length === 0) return;
|
||||
new Chart(document.getElementById('growthChart'), {{
|
||||
type: 'line',
|
||||
data: {{
|
||||
labels: data.dates,
|
||||
datasets: [
|
||||
{{ label: 'Sources', data: data.sources, borderColor: '#58a6ff', backgroundColor: 'rgba(88,166,255,0.1)', fill: true, tension: 0.3, pointRadius: 1 }},
|
||||
{{ label: 'PRs Created', data: data.prs, borderColor: '#d29922', backgroundColor: 'rgba(210,153,34,0.1)', fill: false, tension: 0.3, pointRadius: 1 }},
|
||||
{{ label: 'Merged', data: data.merged, borderColor: '#3fb950', backgroundColor: 'rgba(63,185,80,0.1)', fill: false, tension: 0.3, pointRadius: 1 }},
|
||||
],
|
||||
}},
|
||||
options: {{
|
||||
responsive: true,
|
||||
interaction: {{ mode: 'index', intersect: false }},
|
||||
scales: {{
|
||||
x: {{ type: 'time', time: {{ unit: 'day', displayFormats: {{ day: 'MMM d' }} }}, grid: {{ display: false }} }},
|
||||
y: {{ min: 0, title: {{ display: true, text: 'Cumulative Count' }} }},
|
||||
}},
|
||||
plugins: {{ legend: {{ labels: {{ boxWidth: 12 }} }} }},
|
||||
}},
|
||||
}});
|
||||
}}).catch(() => {{}});
|
||||
|
||||
// --- PR Trace Lookup ---
|
||||
document.getElementById('trace-pr-input').addEventListener('keydown', e => {{ if (e.key === 'Enter') loadTrace(); }});
|
||||
|
||||
function loadTrace() {{
|
||||
const pr = document.getElementById('trace-pr-input').value.trim();
|
||||
const container = document.getElementById('trace-result');
|
||||
if (!pr) {{ container.innerHTML = '<p style="color:#8b949e">Enter a PR number</p>'; return; }}
|
||||
container.innerHTML = '<p style="color:#8b949e">Loading...</p>';
|
||||
|
||||
fetch('/api/trace/' + encodeURIComponent(pr))
|
||||
.then(r => r.json())
|
||||
.then(data => {{
|
||||
if (!data.pr && data.timeline.length === 0) {{
|
||||
container.innerHTML = '<p style="color:#8b949e">No trace found for PR ' + esc(pr) + '</p>';
|
||||
return;
|
||||
}}
|
||||
|
||||
const stageColors = {{
|
||||
ingest: '#58a6ff', validate: '#d29922', evaluate: '#f0883e',
|
||||
merge: '#3fb950', cascade: '#bc8cff', cross_domain: '#79c0ff'
|
||||
}};
|
||||
|
||||
let html = '';
|
||||
|
||||
// PR summary
|
||||
if (data.pr) {{
|
||||
const p = data.pr;
|
||||
html += '<div style="margin-bottom:12px;padding:8px 12px;background:#21262d;border-radius:6px;font-size:13px">' +
|
||||
'<strong>PR #' + esc(String(p.number)) + '</strong> · ' +
|
||||
'<span style="color:' + (p.status === 'merged' ? '#3fb950' : '#d29922') + '">' + esc(p.status) + '</span>' +
|
||||
' · ' + esc(p.domain || 'general') +
|
||||
' · ' + esc(p.agent || '?') +
|
||||
' · ' + esc(p.tier || '?') +
|
||||
' · created ' + esc(p.created_at || '') +
|
||||
(p.merged_at ? ' · merged ' + esc(p.merged_at) : '') +
|
||||
'</div>';
|
||||
}}
|
||||
|
||||
// Timeline
|
||||
if (data.timeline.length > 0) {{
|
||||
html += '<div style="font-size:12px;font-weight:600;color:#8b949e;margin-bottom:6px;text-transform:uppercase">Timeline</div>';
|
||||
html += '<table style="font-size:12px"><tr><th>Time</th><th>Stage</th><th>Event</th><th>Details</th></tr>';
|
||||
for (const evt of data.timeline) {{
|
||||
const sc = stageColors[evt.stage] || '#8b949e';
|
||||
const detail = evt.detail || {{}};
|
||||
// Show key fields inline, expandable full JSON
|
||||
const keyFields = [];
|
||||
if (detail.issues) keyFields.push('issues: ' + detail.issues.join(', '));
|
||||
if (detail.agent) keyFields.push('agent: ' + detail.agent);
|
||||
if (detail.tier) keyFields.push('tier: ' + detail.tier);
|
||||
if (detail.leo) keyFields.push('leo: ' + detail.leo);
|
||||
if (detail.domain) keyFields.push('domain: ' + detail.domain);
|
||||
if (detail.pass != null) keyFields.push('pass: ' + detail.pass);
|
||||
if (detail.attempt) keyFields.push('attempt: ' + detail.attempt);
|
||||
const summary = keyFields.length > 0 ? esc(keyFields.join(' | ')) : '';
|
||||
const fullJson = JSON.stringify(detail, null, 2);
|
||||
const detailId = 'trace-detail-' + Math.random().toString(36).substr(2, 6);
|
||||
|
||||
html += '<tr>' +
|
||||
'<td style="white-space:nowrap;color:#8b949e">' + esc(evt.timestamp) + '</td>' +
|
||||
'<td><span style="color:' + sc + ';font-weight:600">' + esc(evt.stage) + '</span></td>' +
|
||||
'<td>' + esc(evt.event) + '</td>' +
|
||||
'<td>' + summary +
|
||||
(Object.keys(detail).length > 0
|
||||
? ' <a style="color:#58a6ff;cursor:pointer;font-size:11px" onclick="document.getElementById(\\\'' + detailId + '\\\').style.display=document.getElementById(\\\'' + detailId + '\\\').style.display===\\\'none\\\'?\\\'block\\\':\\\'none\\\'">[json]</a>' +
|
||||
'<pre id="' + detailId + '" style="display:none;margin-top:4px;background:#0d1117;padding:6px;border-radius:4px;font-size:11px;overflow-x:auto;max-width:500px">' + esc(fullJson) + '</pre>'
|
||||
: '') +
|
||||
'</td></tr>';
|
||||
}}
|
||||
html += '</table>';
|
||||
}}
|
||||
|
||||
// Reviews
|
||||
if (data.reviews && data.reviews.length > 0) {{
|
||||
html += '<div style="font-size:12px;font-weight:600;color:#8b949e;margin:12px 0 6px;text-transform:uppercase">Reviews</div>';
|
||||
html += '<table style="font-size:12px"><tr><th>Claim</th><th>Outcome</th><th>Reviewer</th><th>Reason</th></tr>';
|
||||
for (const rv of data.reviews) {{
|
||||
const outColor = rv.outcome === 'approved' ? '#3fb950' : rv.outcome === 'rejected' ? '#f85149' : '#d29922';
|
||||
html += '<tr>' +
|
||||
'<td style="max-width:250px;overflow:hidden;text-overflow:ellipsis">' + esc(rv.claim_path || '-') + '</td>' +
|
||||
'<td><span class="badge" style="background:' + outColor + '33;color:' + outColor + '">' + esc(rv.outcome || '-') + '</span></td>' +
|
||||
'<td>' + esc(rv.reviewer || '-') + '</td>' +
|
||||
'<td>' + esc(rv.rejection_reason || '') + '</td></tr>';
|
||||
}}
|
||||
html += '</table>';
|
||||
}}
|
||||
|
||||
container.innerHTML = html;
|
||||
}})
|
||||
.catch(err => {{
|
||||
container.innerHTML = '<p style="color:#f85149">Error: ' + esc(err.message) + '</p>';
|
||||
}});
|
||||
}}
|
||||
</script>"""
|
||||
|
||||
return render_page(
|
||||
title="Pipeline Operations",
|
||||
subtitle="Is the machine running?",
|
||||
active_path="/ops",
|
||||
body_html=body,
|
||||
scripts=scripts,
|
||||
timestamp=now.strftime("%Y-%m-%d %H:%M UTC"),
|
||||
)
|
||||
408
diagnostics/dashboard_portfolio.py
Normal file
408
diagnostics/dashboard_portfolio.py
Normal file
|
|
@ -0,0 +1,408 @@
|
|||
"""Portfolio dashboard — fixes empty chart by:
|
||||
1. Computing NAV server-side in the history API (not client-side from nulls)
|
||||
2. Only returning dates with valid NAV data
|
||||
3. Showing data points when sparse
|
||||
"""
|
||||
|
||||
import json
|
||||
import sqlite3
|
||||
import logging
|
||||
from html import escape as esc
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from aiohttp import web
|
||||
from shared_ui import render_page
|
||||
|
||||
logger = logging.getLogger("argus.portfolio")
|
||||
|
||||
CSS = """
|
||||
.hero-chart { background: #161b22; border: 1px solid #30363d; border-radius: 8px; padding: 20px; margin-bottom: 20px; }
|
||||
.hero-chart h2 { color: #c9d1d9; font-size: 18px; margin-bottom: 12px; }
|
||||
.range-btns { display: flex; gap: 4px; margin-bottom: 12px; }
|
||||
.range-btn { background: #21262d; border: 1px solid #30363d; color: #8b949e; padding: 5px 14px;
|
||||
border-radius: 4px; cursor: pointer; font-size: 12px; }
|
||||
.range-btn.active { background: #1f6feb33; border-color: #58a6ff; color: #58a6ff; }
|
||||
.ptable-wrap { overflow-x: auto; margin-top: 20px; }
|
||||
.ptable { width: 100%; border-collapse: collapse; font-size: 13px; }
|
||||
.ptable th { background: #161b22; color: #8b949e; font-size: 11px; text-transform: uppercase;
|
||||
letter-spacing: 0.5px; padding: 10px 12px; text-align: right; border-bottom: 1px solid #30363d;
|
||||
cursor: pointer; user-select: none; white-space: nowrap; }
|
||||
.ptable th:first-child { text-align: left; position: sticky; left: 0; background: #161b22; z-index: 1; }
|
||||
.ptable th:hover { color: #c9d1d9; }
|
||||
.ptable th.sorted-asc::after { content: ' \\25B2'; font-size: 9px; }
|
||||
.ptable th.sorted-desc::after { content: ' \\25BC'; font-size: 9px; }
|
||||
.ptable td { padding: 10px 12px; text-align: right; border-bottom: 1px solid #21262d; color: #c9d1d9; }
|
||||
.ptable td:first-child { text-align: left; position: sticky; left: 0; background: #0d1117; z-index: 1; font-weight: 600; }
|
||||
.ptable tr:hover td { background: #161b22; }
|
||||
.ptable tr:hover td:first-child { background: #161b22; }
|
||||
.summary-row td { font-weight: 700; border-top: 2px solid #30363d; background: #161b22 !important; }
|
||||
.premium { color: #f85149; }
|
||||
.discount { color: #3fb950; }
|
||||
.near-nav { color: #d29922; }
|
||||
"""
|
||||
|
||||
|
||||
def _fmt_usd(v):
|
||||
if v is None:
|
||||
return '\u2014'
|
||||
if abs(v) >= 1_000_000:
|
||||
return f'${v / 1_000_000:.1f}M'
|
||||
if abs(v) >= 1_000:
|
||||
return f'${v / 1_000:.0f}K'
|
||||
return f'${v:,.0f}'
|
||||
|
||||
|
||||
def _fmt_price(v):
|
||||
if v is None:
|
||||
return '\u2014'
|
||||
if v >= 100:
|
||||
return f'${v:,.0f}'
|
||||
if v >= 1:
|
||||
return f'${v:.2f}'
|
||||
if v >= 0.01:
|
||||
return f'${v:.4f}'
|
||||
return f'${v:.6f}'
|
||||
|
||||
|
||||
def _fmt_ratio(v):
|
||||
if v is None or v == 0:
|
||||
return '\u2014'
|
||||
return f'{v:.2f}x'
|
||||
|
||||
|
||||
def _ratio_class(v):
|
||||
if v is None or v == 0:
|
||||
return ''
|
||||
if v > 1.5:
|
||||
return 'premium'
|
||||
if v < 0.9:
|
||||
return 'discount'
|
||||
if v <= 1.1:
|
||||
return 'near-nav'
|
||||
return ''
|
||||
|
||||
|
||||
def render_portfolio_page(coins: list[dict], now: datetime) -> str:
|
||||
if not coins:
|
||||
body = '<div style="padding:40px;text-align:center;color:#8b949e;">No coin data yet.</div>'
|
||||
return render_page("Portfolio", "Ownership coin portfolio", "/portfolio", body,
|
||||
extra_css=CSS, timestamp=now.strftime("%Y-%m-%d %H:%M UTC"))
|
||||
|
||||
total_mcap = sum(c.get('market_cap_usd') or 0 for c in coins)
|
||||
total_treasury = sum(c.get('treasury_usd') or 0 for c in coins)
|
||||
|
||||
hero_chart = """
|
||||
<div class="hero-chart">
|
||||
<h2>Price / NAV per Token</h2>
|
||||
<div class="range-btns">
|
||||
<button class="range-btn" onclick="setRange(this, 30)">30d</button>
|
||||
<button class="range-btn active" onclick="setRange(this, 90)">90d</button>
|
||||
<button class="range-btn" onclick="setRange(this, 180)">180d</button>
|
||||
<button class="range-btn" onclick="setRange(this, 365)">All</button>
|
||||
</div>
|
||||
<canvas id="ratio-chart" height="320" style="max-height:320px"></canvas>
|
||||
</div>
|
||||
"""
|
||||
|
||||
header = """<div class="ptable-wrap"><table class="ptable" id="coin-table">
|
||||
<thead><tr>
|
||||
<th data-col="name">Coin</th>
|
||||
<th data-col="price">Price</th>
|
||||
<th data-col="nav">NAV / Token</th>
|
||||
<th data-col="ratio">Price / NAV</th>
|
||||
<th data-col="treasury">Treasury</th>
|
||||
<th data-col="mcap">Market Cap</th>
|
||||
</tr></thead><tbody>"""
|
||||
|
||||
rows = ''
|
||||
for c in coins:
|
||||
name = c.get('name', '?')
|
||||
ticker = c.get('ticker', '')
|
||||
price = c.get('price_usd')
|
||||
nav = c.get('nav_per_token')
|
||||
ratio = c.get('price_nav_ratio')
|
||||
treasury = c.get('treasury_usd')
|
||||
mcap = c.get('market_cap_usd')
|
||||
|
||||
label = esc(name)
|
||||
if ticker:
|
||||
label += f' <span style="color:#8b949e;font-size:11px;">{esc(ticker)}</span>'
|
||||
|
||||
rows += f"""<tr>
|
||||
<td>{label}</td>
|
||||
<td>{_fmt_price(price)}</td>
|
||||
<td>{_fmt_price(nav)}</td>
|
||||
<td class="{_ratio_class(ratio)}">{_fmt_ratio(ratio)}</td>
|
||||
<td>{_fmt_usd(treasury)}</td>
|
||||
<td>{_fmt_usd(mcap)}</td>
|
||||
</tr>"""
|
||||
|
||||
rows += f"""<tr class="summary-row">
|
||||
<td>Total ({len(coins)})</td>
|
||||
<td></td><td></td><td></td>
|
||||
<td>{_fmt_usd(total_treasury)}</td>
|
||||
<td>{_fmt_usd(total_mcap)}</td>
|
||||
</tr>"""
|
||||
|
||||
table = header + rows + '</tbody></table></div>'
|
||||
|
||||
scripts = """<script>
|
||||
const COLORS = ['#58a6ff','#3fb950','#f0883e','#d29922','#f85149','#bc8cff','#39d353','#79c0ff','#ff7b72','#a5d6ff'];
|
||||
let chart = null;
|
||||
|
||||
function setRange(btn, days) {
|
||||
document.querySelectorAll('.range-btn').forEach(b => b.classList.remove('active'));
|
||||
btn.classList.add('active');
|
||||
loadChart(days);
|
||||
}
|
||||
|
||||
function loadChart(days) {
|
||||
fetch('/api/portfolio/nav-ratios?days=' + days)
|
||||
.then(r => r.json())
|
||||
.then(data => {
|
||||
const dates = data.dates || [];
|
||||
const series = data.series || {};
|
||||
|
||||
if (dates.length === 0) {
|
||||
if (chart) chart.destroy();
|
||||
chart = null;
|
||||
const ctx = document.getElementById('ratio-chart').getContext('2d');
|
||||
ctx.fillStyle = '#8b949e';
|
||||
ctx.font = '14px sans-serif';
|
||||
ctx.textAlign = 'center';
|
||||
ctx.fillText('No NAV data yet — accumulating daily snapshots', ctx.canvas.width / 2, 160);
|
||||
return;
|
||||
}
|
||||
|
||||
const sparse = dates.length <= 10;
|
||||
const datasets = [];
|
||||
let i = 0;
|
||||
for (const [name, ratios] of Object.entries(series)) {
|
||||
const hasData = ratios.some(v => v !== null);
|
||||
if (!hasData) { i++; continue; }
|
||||
datasets.push({
|
||||
label: name,
|
||||
data: ratios,
|
||||
borderColor: COLORS[i % COLORS.length],
|
||||
backgroundColor: COLORS[i % COLORS.length] + '33',
|
||||
borderWidth: 2,
|
||||
tension: 0.3,
|
||||
spanGaps: true,
|
||||
pointRadius: sparse ? 4 : 0,
|
||||
pointHoverRadius: 6,
|
||||
fill: false,
|
||||
});
|
||||
i++;
|
||||
}
|
||||
|
||||
if (chart) chart.destroy();
|
||||
const ctx = document.getElementById('ratio-chart').getContext('2d');
|
||||
chart = new Chart(ctx, {
|
||||
type: 'line',
|
||||
data: { labels: dates, datasets },
|
||||
options: {
|
||||
responsive: true,
|
||||
maintainAspectRatio: false,
|
||||
interaction: { mode: 'index', intersect: false },
|
||||
plugins: {
|
||||
legend: { labels: { color: '#8b949e', font: { size: 11 }, usePointStyle: true, boxWidth: 8 }, position: 'top' },
|
||||
tooltip: { mode: 'index', intersect: false,
|
||||
callbacks: { label: ctx => ctx.dataset.label + ': ' + (ctx.parsed.y != null ? ctx.parsed.y.toFixed(2) + 'x' : 'n/a') }
|
||||
},
|
||||
annotation: {
|
||||
annotations: {
|
||||
navLine: {
|
||||
type: 'line',
|
||||
yMin: 1, yMax: 1,
|
||||
borderColor: '#3fb95088',
|
||||
borderWidth: 2,
|
||||
borderDash: [6, 4],
|
||||
label: {
|
||||
display: true,
|
||||
content: '1.0x = NAV',
|
||||
position: 'end',
|
||||
backgroundColor: '#3fb95033',
|
||||
color: '#3fb950',
|
||||
font: { size: 10 },
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
scales: {
|
||||
x: { ticks: { color: '#8b949e', maxTicksLimit: 12 }, grid: { display: false } },
|
||||
y: { ticks: { color: '#8b949e', callback: v => v.toFixed(1) + 'x' }, grid: { color: '#21262d' },
|
||||
suggestedMin: 0 }
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
// Table sorting
|
||||
function sortTable(col) {
|
||||
const table = document.getElementById('coin-table');
|
||||
const tbody = table.querySelector('tbody');
|
||||
const rows = Array.from(tbody.querySelectorAll('tr:not(.summary-row)'));
|
||||
const summaryRow = tbody.querySelector('.summary-row');
|
||||
const th = table.querySelectorAll('th')[col];
|
||||
const asc = th.classList.contains('sorted-asc');
|
||||
table.querySelectorAll('th').forEach(h => h.classList.remove('sorted-asc','sorted-desc'));
|
||||
th.classList.add(asc ? 'sorted-desc' : 'sorted-asc');
|
||||
rows.sort((a, b) => {
|
||||
let va = a.cells[col].textContent.replace(/[$,+%x\\u2014]/g,'').trim();
|
||||
let vb = b.cells[col].textContent.replace(/[$,+%x\\u2014]/g,'').trim();
|
||||
const na = parseFloat(va) || 0, nb = parseFloat(vb) || 0;
|
||||
if (col === 0) return asc ? vb.localeCompare(va) : va.localeCompare(vb);
|
||||
return asc ? na - nb : nb - na;
|
||||
});
|
||||
rows.forEach(r => tbody.appendChild(r));
|
||||
if (summaryRow) tbody.appendChild(summaryRow);
|
||||
}
|
||||
document.querySelectorAll('#coin-table th').forEach((th, i) => {
|
||||
th.addEventListener('click', () => sortTable(i));
|
||||
});
|
||||
|
||||
loadChart(90);
|
||||
</script>"""
|
||||
|
||||
body = hero_chart + table
|
||||
return render_page("Portfolio", "Ownership coin portfolio", "/portfolio", body,
|
||||
scripts=scripts, extra_css=CSS,
|
||||
timestamp=now.strftime("%Y-%m-%d %H:%M UTC"))
|
||||
|
||||
|
||||
# ── API handlers ────────────────────────────────────────────────────────────
|
||||
|
||||
def _get_db(request):
|
||||
return request.app["_portfolio_conn"]()
|
||||
|
||||
|
||||
def _compute_nav(row):
|
||||
"""Compute NAV per token and Price/NAV ratio from a snapshot row dict."""
|
||||
treas = (row.get('treasury_multisig_usd') or 0) + (row.get('lp_usdc_total') or 0)
|
||||
adj = row.get('adjusted_circulating_supply') or 0
|
||||
price = row.get('price_usd') or 0
|
||||
nav = treas / adj if adj > 0 else 0
|
||||
ratio = price / nav if nav > 0 else 0
|
||||
return treas, nav, ratio
|
||||
|
||||
|
||||
async def handle_portfolio_page(request):
|
||||
conn = _get_db(request)
|
||||
try:
|
||||
rows = conn.execute("""
|
||||
SELECT * FROM coin_snapshots
|
||||
WHERE snapshot_date = (SELECT MAX(snapshot_date) FROM coin_snapshots)
|
||||
ORDER BY market_cap_usd DESC
|
||||
""").fetchall()
|
||||
coins = []
|
||||
for r in rows:
|
||||
d = dict(r)
|
||||
treas, nav, ratio = _compute_nav(d)
|
||||
d['treasury_usd'] = treas
|
||||
d['nav_per_token'] = nav
|
||||
d['price_nav_ratio'] = ratio
|
||||
coins.append(d)
|
||||
now = datetime.now(timezone.utc)
|
||||
html = render_portfolio_page(coins, now)
|
||||
return web.Response(text=html, content_type='text/html')
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
async def handle_nav_ratios(request):
|
||||
"""Server-side computed NAV ratios — only returns dates with valid data."""
|
||||
conn = _get_db(request)
|
||||
try:
|
||||
try:
|
||||
days = min(int(request.query.get('days', '90')), 365)
|
||||
except (ValueError, TypeError):
|
||||
days = 90
|
||||
rows = conn.execute("""
|
||||
SELECT name, snapshot_date, price_usd, treasury_multisig_usd,
|
||||
lp_usdc_total, adjusted_circulating_supply
|
||||
FROM coin_snapshots
|
||||
WHERE snapshot_date >= date('now', ? || ' days')
|
||||
AND adjusted_circulating_supply IS NOT NULL
|
||||
AND adjusted_circulating_supply > 0
|
||||
ORDER BY name, snapshot_date
|
||||
""", (f'-{days}',)).fetchall()
|
||||
|
||||
coin_ratios = {}
|
||||
all_dates = set()
|
||||
for r in rows:
|
||||
d = dict(r)
|
||||
name = d['name']
|
||||
date = d['snapshot_date']
|
||||
_, nav, ratio = _compute_nav(d)
|
||||
if nav > 0 and ratio > 0:
|
||||
if name not in coin_ratios:
|
||||
coin_ratios[name] = {}
|
||||
coin_ratios[name][date] = round(ratio, 3)
|
||||
all_dates.add(date)
|
||||
|
||||
sorted_dates = sorted(all_dates)
|
||||
series = {}
|
||||
for name, date_map in coin_ratios.items():
|
||||
series[name] = [date_map.get(d) for d in sorted_dates]
|
||||
|
||||
return web.json_response({
|
||||
'dates': sorted_dates,
|
||||
'series': series,
|
||||
})
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
async def handle_portfolio_history(request):
|
||||
conn = _get_db(request)
|
||||
try:
|
||||
try:
|
||||
days = min(int(request.query.get('days', '90')), 365)
|
||||
except (ValueError, TypeError):
|
||||
days = 90
|
||||
rows = conn.execute("""
|
||||
SELECT * FROM coin_snapshots
|
||||
WHERE snapshot_date >= date('now', ? || ' days')
|
||||
ORDER BY name, snapshot_date
|
||||
""", (f'-{days}',)).fetchall()
|
||||
history = {}
|
||||
for r in rows:
|
||||
d = dict(r)
|
||||
key = d['name']
|
||||
if key not in history:
|
||||
history[key] = []
|
||||
history[key].append(d)
|
||||
return web.json_response({'history': history})
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
async def handle_portfolio_latest(request):
|
||||
conn = _get_db(request)
|
||||
try:
|
||||
rows = conn.execute("""
|
||||
SELECT * FROM coin_snapshots
|
||||
WHERE snapshot_date = (SELECT MAX(snapshot_date) FROM coin_snapshots)
|
||||
ORDER BY market_cap_usd DESC
|
||||
""").fetchall()
|
||||
coins = []
|
||||
for r in rows:
|
||||
d = dict(r)
|
||||
treas, nav, ratio = _compute_nav(d)
|
||||
d['treasury_usd'] = treas
|
||||
d['nav_per_token'] = nav
|
||||
d['price_nav_ratio'] = ratio
|
||||
coins.append(d)
|
||||
return web.json_response({'coins': coins, 'date': coins[0]['snapshot_date'] if coins else None})
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def register_portfolio_routes(app, get_conn):
|
||||
app["_portfolio_conn"] = get_conn
|
||||
app.router.add_get("/portfolio", handle_portfolio_page)
|
||||
app.router.add_get("/api/portfolio/nav-ratios", handle_nav_ratios)
|
||||
app.router.add_get("/api/portfolio/history", handle_portfolio_history)
|
||||
app.router.add_get("/api/portfolio/latest", handle_portfolio_latest)
|
||||
564
diagnostics/dashboard_prs.py
Normal file
564
diagnostics/dashboard_prs.py
Normal file
|
|
@ -0,0 +1,564 @@
|
|||
"""PR Lifecycle dashboard — single-page view of every PR through the pipeline.
|
||||
|
||||
Sortable table: PR#, summary, claims, domain, outcome, evals, evaluator, cost, date.
|
||||
Click any row to expand: timeline, claim list, issues summary.
|
||||
Hero cards: total PRs, merge rate, median eval rounds, total claims, total cost.
|
||||
|
||||
Data sources: prs table, audit_log (eval rounds), review_records.
|
||||
Owner: Ship
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
from shared_ui import render_page
|
||||
|
||||
|
||||
EXTRA_CSS = """
|
||||
.page-content { max-width: 1600px !important; }
|
||||
.filters { display: flex; gap: 12px; flex-wrap: wrap; margin-bottom: 16px; }
|
||||
.filters select, .filters input {
|
||||
background: #161b22; color: #c9d1d9; border: 1px solid #30363d;
|
||||
border-radius: 6px; padding: 6px 10px; font-size: 12px; }
|
||||
.filters select:focus, .filters input:focus { border-color: #58a6ff; outline: none; }
|
||||
.pr-table { width: 100%; border-collapse: collapse; font-size: 13px; table-layout: fixed; }
|
||||
.pr-table th:nth-child(1) { width: 50px; } /* PR# */
|
||||
.pr-table th:nth-child(2) { width: 30%; } /* Summary */
|
||||
.pr-table th:nth-child(3) { width: 50px; } /* Claims */
|
||||
.pr-table th:nth-child(4) { width: 12%; } /* Domain */
|
||||
.pr-table th:nth-child(5) { width: 10%; } /* Outcome */
|
||||
.pr-table th:nth-child(6) { width: 50px; } /* Evals */
|
||||
.pr-table th:nth-child(7) { width: 16%; } /* Evaluator */
|
||||
.pr-table th:nth-child(8) { width: 70px; } /* Cost */
|
||||
.pr-table th:nth-child(9) { width: 90px; } /* Date */
|
||||
.pr-table td { overflow: hidden; text-overflow: ellipsis; white-space: nowrap; padding: 8px 6px; }
|
||||
.pr-table td:nth-child(2) { white-space: normal; overflow: visible; line-height: 1.4; }
|
||||
.pr-table th { cursor: pointer; user-select: none; position: relative; padding: 8px 18px 8px 6px; }
|
||||
.pr-table th:hover { color: #58a6ff; }
|
||||
.pr-table th .sort-arrow { position: absolute; right: 4px; top: 50%; transform: translateY(-50%); font-size: 10px; opacity: 0.5; }
|
||||
.pr-table th.sorted .sort-arrow { opacity: 1; color: #58a6ff; }
|
||||
.pr-table tr { cursor: pointer; transition: background 0.1s; }
|
||||
.pr-table tbody tr:hover { background: #161b22; }
|
||||
.pr-table .outcome-merged { color: #3fb950; }
|
||||
.pr-table .outcome-closed { color: #f85149; }
|
||||
.pr-table .outcome-open { color: #d29922; }
|
||||
.pr-table .tier-deep { color: #bc8cff; font-weight: 600; }
|
||||
.pr-table .tier-standard { color: #58a6ff; }
|
||||
.pr-table .tier-light { color: #8b949e; }
|
||||
.pr-table .pr-link { color: #58a6ff; text-decoration: none; }
|
||||
.pr-table .pr-link:hover { text-decoration: underline; }
|
||||
.pr-table td .summary-text { font-size: 12px; color: #c9d1d9; }
|
||||
.pr-table td .review-snippet { font-size: 11px; color: #f85149; margin-top: 2px; opacity: 0.8; }
|
||||
.pr-table td .model-tag { font-size: 9px; color: #6e7681; background: #21262d; border-radius: 3px; padding: 1px 4px; display: inline-block; margin: 1px 0; }
|
||||
.pr-table td .expand-chevron { display: inline-block; width: 12px; color: #484f58; font-size: 10px; transition: transform 0.2s; }
|
||||
.pr-table tr.expanded .expand-chevron { transform: rotate(90deg); color: #58a6ff; }
|
||||
.pr-table td .cost-val { font-size: 12px; color: #8b949e; }
|
||||
.pr-table td .claims-count { font-size: 13px; color: #c9d1d9; text-align: center; }
|
||||
.pr-table td .evals-count { font-size: 13px; text-align: center; }
|
||||
.trace-panel { background: #0d1117; border: 1px solid #30363d; border-radius: 8px;
|
||||
padding: 16px; margin: 4px 0 8px 0; font-size: 12px; display: none; }
|
||||
.trace-panel.open { display: block; }
|
||||
.trace-panel .section-title { color: #58a6ff; font-size: 12px; font-weight: 600; margin: 12px 0 6px; }
|
||||
.trace-panel .section-title:first-child { margin-top: 0; }
|
||||
.trace-panel .claim-list { list-style: none; padding: 0; margin: 0; }
|
||||
.trace-panel .claim-list li { padding: 4px 0; border-bottom: 1px solid #21262d; color: #c9d1d9; font-size: 12px; }
|
||||
.trace-panel .claim-list li:last-child { border-bottom: none; }
|
||||
.trace-panel .issues-box { background: #1c1017; border: 1px solid #f8514930; border-radius: 6px;
|
||||
padding: 8px 12px; margin: 4px 0; font-size: 12px; color: #f85149; }
|
||||
.trace-timeline { list-style: none; padding: 0; }
|
||||
.trace-timeline li { padding: 4px 0; border-left: 2px solid #30363d; padding-left: 12px; margin-left: 8px; }
|
||||
.trace-timeline li .ts { color: #484f58; font-size: 11px; }
|
||||
.trace-timeline li .ev { font-weight: 600; }
|
||||
.trace-timeline li.ev-approved .ev { color: #3fb950; }
|
||||
.trace-timeline li.ev-rejected .ev { color: #f85149; }
|
||||
.trace-timeline li.ev-changes .ev { color: #d29922; }
|
||||
.review-text { background: #161b22; padding: 8px 12px; border-radius: 4px;
|
||||
margin: 4px 0; white-space: pre-wrap; font-size: 11px; color: #8b949e; max-height: 200px; overflow-y: auto; }
|
||||
.eval-chain { background: #161b22; border-radius: 6px; padding: 8px 12px; margin: 4px 0 8px;
|
||||
font-size: 12px; display: flex; gap: 12px; flex-wrap: wrap; align-items: center; }
|
||||
.eval-chain .step { display: flex; align-items: center; gap: 4px; }
|
||||
.eval-chain .step-label { color: #8b949e; font-size: 11px; }
|
||||
.eval-chain .step-model { color: #c9d1d9; font-size: 11px; font-weight: 600; }
|
||||
.eval-chain .arrow { color: #484f58; }
|
||||
.pagination { display: flex; gap: 8px; align-items: center; justify-content: center; margin-top: 16px; }
|
||||
.pagination button { background: #161b22; color: #c9d1d9; border: 1px solid #30363d;
|
||||
border-radius: 4px; padding: 4px 12px; cursor: pointer; font-size: 12px; }
|
||||
.pagination button:hover { border-color: #58a6ff; }
|
||||
.pagination button:disabled { opacity: 0.4; cursor: default; }
|
||||
.pagination .page-info { color: #8b949e; font-size: 12px; }
|
||||
"""
|
||||
|
||||
|
||||
def render_prs_page(now: datetime) -> str:
|
||||
"""Render the PR lifecycle page. All data loaded client-side via /api/pr-lifecycle."""
|
||||
|
||||
body = """
|
||||
<!-- Hero cards (populated by JS) -->
|
||||
<div class="grid" id="hero-cards">
|
||||
<div class="card"><div class="label">Total PRs</div><div class="value blue" id="kpi-total">--</div><div class="detail" id="kpi-total-detail"></div></div>
|
||||
<div class="card"><div class="label">Merge Rate</div><div class="value green" id="kpi-merge-rate">--</div><div class="detail" id="kpi-merge-detail"></div></div>
|
||||
<div class="card"><div class="label">Median Eval Rounds</div><div class="value" id="kpi-rounds">--</div><div class="detail" id="kpi-rounds-detail"></div></div>
|
||||
<div class="card"><div class="label">Total Claims</div><div class="value blue" id="kpi-claims">--</div><div class="detail" id="kpi-claims-detail"></div></div>
|
||||
<div class="card"><div class="label">Est. Cost</div><div class="value" id="kpi-cost">--</div><div class="detail" id="kpi-cost-detail"></div></div>
|
||||
</div>
|
||||
|
||||
<!-- Filters -->
|
||||
<div class="filters">
|
||||
<select id="filter-domain"><option value="">All Domains</option></select>
|
||||
<select id="filter-outcome">
|
||||
<option value="">All Outcomes</option>
|
||||
<option value="merged">Merged</option>
|
||||
<option value="closed">Rejected</option>
|
||||
<option value="open">Open</option>
|
||||
</select>
|
||||
<select id="filter-tier">
|
||||
<option value="">All Tiers</option>
|
||||
<option value="DEEP">Deep</option>
|
||||
<option value="STANDARD">Standard</option>
|
||||
<option value="LIGHT">Light</option>
|
||||
</select>
|
||||
<select id="filter-days">
|
||||
<option value="7">Last 7 days</option>
|
||||
<option value="30" selected>Last 30 days</option>
|
||||
<option value="90">Last 90 days</option>
|
||||
<option value="0">All time</option>
|
||||
</select>
|
||||
</div>
|
||||
|
||||
<!-- PR table -->
|
||||
<div class="card" style="padding: 0; overflow: hidden;">
|
||||
<table class="pr-table">
|
||||
<thead>
|
||||
<tr>
|
||||
<th data-col="number">PR# <span class="sort-arrow">▲</span></th>
|
||||
<th data-col="summary">Summary <span class="sort-arrow">▲</span></th>
|
||||
<th data-col="claims_count">Claims <span class="sort-arrow">▲</span></th>
|
||||
<th data-col="domain">Domain <span class="sort-arrow">▲</span></th>
|
||||
<th data-col="status">Outcome <span class="sort-arrow">▲</span></th>
|
||||
<th data-col="eval_rounds">Evals <span class="sort-arrow">▲</span></th>
|
||||
<th data-col="evaluator">Evaluator <span class="sort-arrow">▲</span></th>
|
||||
<th data-col="est_cost">Cost <span class="sort-arrow">▲</span></th>
|
||||
<th data-col="created_at">Date <span class="sort-arrow">▲</span></th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody id="pr-tbody"></tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
<!-- Pagination -->
|
||||
<div class="pagination">
|
||||
<button id="pg-prev" disabled>« Prev</button>
|
||||
<span class="page-info" id="pg-info">--</span>
|
||||
<button id="pg-next" disabled>Next »</button>
|
||||
</div>
|
||||
"""
|
||||
|
||||
# Use single-quoted JS strings throughout to avoid Python/HTML escaping issues
|
||||
scripts = """<script>
|
||||
const PAGE_SIZE = 50;
|
||||
const FORGEJO = 'https://git.livingip.xyz/teleo/teleo-codex/pulls/';
|
||||
let allData = [];
|
||||
let filtered = [];
|
||||
let sortCol = 'number';
|
||||
let sortAsc = false;
|
||||
let page = 0;
|
||||
let expandedPr = null;
|
||||
|
||||
function loadData() {
|
||||
var days = document.getElementById('filter-days').value;
|
||||
var url = '/api/pr-lifecycle' + (days !== '0' ? '?days=' + days : '?days=9999');
|
||||
fetch(url).then(function(r) { return r.json(); }).then(function(data) {
|
||||
allData = data.prs || [];
|
||||
populateFilters(allData);
|
||||
updateKPIs(data);
|
||||
applyFilters();
|
||||
}).catch(function() {
|
||||
document.getElementById('pr-tbody').innerHTML =
|
||||
'<tr><td colspan="9" style="text-align:center;color:#f85149;">Failed to load data</td></tr>';
|
||||
});
|
||||
}
|
||||
|
||||
function populateFilters(prs) {
|
||||
var domains = [], seenD = {};
|
||||
prs.forEach(function(p) {
|
||||
if (p.domain && !seenD[p.domain]) { seenD[p.domain] = 1; domains.push(p.domain); }
|
||||
});
|
||||
domains.sort();
|
||||
var domSel = document.getElementById('filter-domain');
|
||||
var curDom = domSel.value;
|
||||
domSel.innerHTML = '<option value="">All Domains</option>' +
|
||||
domains.map(function(d) { return '<option value="' + esc(d) + '">' + esc(d) + '</option>'; }).join('');
|
||||
domSel.value = curDom;
|
||||
}
|
||||
|
||||
function updateKPIs(data) {
|
||||
document.getElementById('kpi-total').textContent = fmtNum(data.total);
|
||||
document.getElementById('kpi-total-detail').textContent =
|
||||
fmtNum(data.merged) + ' merged, ' + fmtNum(data.closed) + ' rejected';
|
||||
|
||||
var rate = data.total > 0 ? data.merged / (data.merged + data.closed) : 0;
|
||||
document.getElementById('kpi-merge-rate').textContent = fmtPct(rate);
|
||||
document.getElementById('kpi-merge-detail').textContent = fmtNum(data.open) + ' open';
|
||||
|
||||
document.getElementById('kpi-rounds').textContent =
|
||||
data.median_rounds != null ? data.median_rounds.toFixed(1) : '--';
|
||||
document.getElementById('kpi-rounds-detail').textContent =
|
||||
data.max_rounds != null ? 'max: ' + data.max_rounds : '';
|
||||
|
||||
var totalClaims = 0, mergedClaims = 0;
|
||||
var totalCost = 0;
|
||||
var actualCount = 0, estCount = 0;
|
||||
(data.prs || []).forEach(function(p) {
|
||||
totalClaims += (p.claims_count || 1);
|
||||
if (p.status === 'merged') mergedClaims += (p.claims_count || 1);
|
||||
totalCost += (p.cost || 0);
|
||||
if (p.cost_is_actual) actualCount++; else estCount++;
|
||||
});
|
||||
document.getElementById('kpi-claims').textContent = fmtNum(totalClaims);
|
||||
document.getElementById('kpi-claims-detail').textContent = fmtNum(mergedClaims) + ' merged';
|
||||
|
||||
// Show actual DB total if available, otherwise sum from PRs
|
||||
var costLabel = '';
|
||||
if (data.actual_total_cost > 0) {
|
||||
document.getElementById('kpi-cost').textContent = '$' + data.actual_total_cost.toFixed(2);
|
||||
costLabel = 'from costs table';
|
||||
} else if (actualCount > 0) {
|
||||
document.getElementById('kpi-cost').textContent = '$' + totalCost.toFixed(2);
|
||||
costLabel = actualCount + ' actual, ' + estCount + ' est.';
|
||||
} else {
|
||||
document.getElementById('kpi-cost').textContent = '$' + totalCost.toFixed(2);
|
||||
costLabel = 'ALL ESTIMATED';
|
||||
}
|
||||
var costPerClaim = totalClaims > 0 ? totalCost / totalClaims : 0;
|
||||
document.getElementById('kpi-cost-detail').textContent =
|
||||
'$' + costPerClaim.toFixed(3) + '/claim \u00b7 ' + costLabel;
|
||||
}
|
||||
|
||||
function applyFilters() {
|
||||
var dom = document.getElementById('filter-domain').value;
|
||||
var out = document.getElementById('filter-outcome').value;
|
||||
var tier = document.getElementById('filter-tier').value;
|
||||
|
||||
filtered = allData.filter(function(p) {
|
||||
if (dom && p.domain !== dom) return false;
|
||||
if (out && p.status !== out) return false;
|
||||
if (tier && p.tier !== tier) return false;
|
||||
return true;
|
||||
});
|
||||
|
||||
sortData();
|
||||
page = 0;
|
||||
renderTable();
|
||||
}
|
||||
|
||||
function sortData() {
|
||||
filtered.sort(function(a, b) {
|
||||
var va = a[sortCol], vb = b[sortCol];
|
||||
if (va == null) va = '';
|
||||
if (vb == null) vb = '';
|
||||
if (typeof va === 'number' && typeof vb === 'number') {
|
||||
return sortAsc ? va - vb : vb - va;
|
||||
}
|
||||
va = String(va).toLowerCase();
|
||||
vb = String(vb).toLowerCase();
|
||||
return sortAsc ? va.localeCompare(vb) : vb.localeCompare(va);
|
||||
});
|
||||
}
|
||||
|
||||
function truncate(s, n) {
|
||||
if (!s) return '';
|
||||
return s.length > n ? s.substring(0, n) + '...' : s;
|
||||
}
|
||||
|
||||
function shortModel(m) {
|
||||
if (!m) return '';
|
||||
// Shorten model names for display
|
||||
if (m.indexOf('gemini-2.5-flash') !== -1) return 'Gemini Flash';
|
||||
if (m.indexOf('claude-sonnet') !== -1 || m.indexOf('sonnet-4') !== -1) return 'Sonnet';
|
||||
if (m.indexOf('claude-opus') !== -1 || m.indexOf('opus') !== -1) return 'Opus';
|
||||
if (m.indexOf('haiku') !== -1) return 'Haiku';
|
||||
if (m.indexOf('gpt-4o') !== -1) return 'GPT-4o';
|
||||
// fallback: strip provider prefix
|
||||
var parts = m.split('/');
|
||||
return parts[parts.length - 1];
|
||||
}
|
||||
|
||||
function renderTable() {
|
||||
var tbody = document.getElementById('pr-tbody');
|
||||
var start = page * PAGE_SIZE;
|
||||
var slice = filtered.slice(start, start + PAGE_SIZE);
|
||||
var totalPages = Math.ceil(filtered.length / PAGE_SIZE);
|
||||
|
||||
if (slice.length === 0) {
|
||||
tbody.innerHTML = '<tr><td colspan="9" style="text-align:center;color:#8b949e;">No PRs match filters</td></tr>';
|
||||
return;
|
||||
}
|
||||
|
||||
var rows = [];
|
||||
slice.forEach(function(p) {
|
||||
var outClass = p.status === 'merged' ? 'outcome-merged' :
|
||||
p.status === 'closed' ? 'outcome-closed' : 'outcome-open';
|
||||
var tierClass = (p.tier || '').toLowerCase() === 'deep' ? 'tier-deep' :
|
||||
(p.tier || '').toLowerCase() === 'standard' ? 'tier-standard' : 'tier-light';
|
||||
var date = p.created_at ? p.created_at.substring(0, 10) : '--';
|
||||
|
||||
// Summary
|
||||
var summary = p.summary || '--';
|
||||
var reviewSnippet = '';
|
||||
if (p.status === 'closed' && p.review_snippet) {
|
||||
reviewSnippet = '<div class="review-snippet">' + esc(truncate(p.review_snippet, 120)) + '</div>';
|
||||
}
|
||||
|
||||
// Outcome with tier badge
|
||||
var outcomeLabel = esc(p.status || '--');
|
||||
var tierBadge = p.tier ? ' <span class="' + tierClass + '" style="font-size:10px;">' + esc(p.tier) + '</span>' : '';
|
||||
|
||||
// Evaluator column: domain agent + model
|
||||
var evaluator = '';
|
||||
if (p.domain_agent) {
|
||||
evaluator = '<div style="font-size:12px;color:#c9d1d9;">' + esc(p.domain_agent) + '</div>';
|
||||
}
|
||||
if (p.domain_model) {
|
||||
evaluator += '<div class="model-tag">' + esc(shortModel(p.domain_model)) + '</div>';
|
||||
}
|
||||
if (p.leo_model) {
|
||||
evaluator += '<div class="model-tag">' + esc(shortModel(p.leo_model)) + '</div>';
|
||||
}
|
||||
if (!evaluator) evaluator = '<span style="color:#484f58;">--</span>';
|
||||
|
||||
// Cost — actual from DB or estimated (flagged)
|
||||
var costStr;
|
||||
if (p.cost != null && p.cost > 0) {
|
||||
if (p.cost_is_actual) {
|
||||
costStr = '<span class="cost-val">$' + p.cost.toFixed(3) + '</span>';
|
||||
} else {
|
||||
costStr = '<span class="cost-val" style="opacity:0.5;" title="Estimated — no actual cost tracked">~$' + p.cost.toFixed(3) + '</span>';
|
||||
}
|
||||
} else {
|
||||
costStr = '<span style="color:#484f58;">--</span>';
|
||||
}
|
||||
|
||||
rows.push(
|
||||
'<tr data-pr="' + p.number + '">' +
|
||||
'<td><span class="expand-chevron">▶</span> ' +
|
||||
'<a class="pr-link" href="' + FORGEJO + p.number + '" target="_blank" rel="noopener" onclick="event.stopPropagation();">#' + p.number + '</a></td>' +
|
||||
'<td style="white-space:normal;"><span class="summary-text">' + esc(summary) + '</span>' + reviewSnippet + '</td>' +
|
||||
'<td style="text-align:center;">' + (p.claims_count || '--') + '</td>' +
|
||||
'<td>' + esc(p.domain || '--') + '</td>' +
|
||||
'<td class="' + outClass + '">' + outcomeLabel + tierBadge + '</td>' +
|
||||
'<td style="text-align:center;">' + (p.eval_rounds || '--') + '</td>' +
|
||||
'<td>' + evaluator + '</td>' +
|
||||
'<td>' + costStr + '</td>' +
|
||||
'<td>' + date + '</td>' +
|
||||
'</tr>' +
|
||||
'<tr id="trace-' + p.number + '" style="display:none;"><td colspan="9" style="padding:0;">' +
|
||||
'<div class="trace-panel" id="panel-' + p.number + '">Loading trace...</div>' +
|
||||
'</td></tr>'
|
||||
);
|
||||
});
|
||||
tbody.innerHTML = rows.join('');
|
||||
|
||||
// Pagination
|
||||
document.getElementById('pg-info').textContent =
|
||||
'Page ' + (totalPages > 0 ? page + 1 : 0) + ' of ' + totalPages +
|
||||
' (' + filtered.length + ' PRs)';
|
||||
document.getElementById('pg-prev').disabled = page <= 0;
|
||||
document.getElementById('pg-next').disabled = page >= totalPages - 1;
|
||||
|
||||
// Update sort arrows
|
||||
document.querySelectorAll('.pr-table th').forEach(function(th) {
|
||||
th.classList.toggle('sorted', th.dataset.col === sortCol);
|
||||
var arrow = th.querySelector('.sort-arrow');
|
||||
if (arrow) arrow.innerHTML = (th.dataset.col === sortCol && sortAsc) ? '▲' : '▼';
|
||||
});
|
||||
}
|
||||
|
||||
// Sort click
|
||||
document.querySelectorAll('.pr-table th').forEach(function(th) {
|
||||
th.addEventListener('click', function() {
|
||||
var col = th.dataset.col;
|
||||
if (col === sortCol) { sortAsc = !sortAsc; }
|
||||
else { sortCol = col; sortAsc = col === 'number' ? false : true; }
|
||||
sortData();
|
||||
renderTable();
|
||||
});
|
||||
});
|
||||
|
||||
// Row click -> trace expand
|
||||
document.getElementById('pr-tbody').addEventListener('click', function(e) {
|
||||
if (e.target.closest('a')) return;
|
||||
var row = e.target.closest('tr[data-pr]');
|
||||
if (!row) return;
|
||||
var pr = row.dataset.pr;
|
||||
var traceRow = document.getElementById('trace-' + pr);
|
||||
var panel = document.getElementById('panel-' + pr);
|
||||
if (!traceRow) return;
|
||||
|
||||
if (traceRow.style.display === 'none') {
|
||||
if (expandedPr && expandedPr !== pr) {
|
||||
var prev = document.getElementById('trace-' + expandedPr);
|
||||
if (prev) prev.style.display = 'none';
|
||||
var prevRow = document.querySelector('tr[data-pr="' + expandedPr + '"]');
|
||||
if (prevRow) prevRow.classList.remove('expanded');
|
||||
}
|
||||
traceRow.style.display = '';
|
||||
panel.classList.add('open');
|
||||
row.classList.add('expanded');
|
||||
expandedPr = pr;
|
||||
loadTrace(pr, panel);
|
||||
} else {
|
||||
traceRow.style.display = 'none';
|
||||
panel.classList.remove('open');
|
||||
row.classList.remove('expanded');
|
||||
expandedPr = null;
|
||||
}
|
||||
});
|
||||
|
||||
function loadTrace(pr, panel) {
|
||||
// Also find this PR in allData for claim list
|
||||
var prData = null;
|
||||
allData.forEach(function(p) { if (p.number == pr) prData = p; });
|
||||
|
||||
fetch('/api/trace/' + pr).then(function(r) { return r.json(); }).then(function(data) {
|
||||
var html = '';
|
||||
|
||||
// --- Claims contained in this PR ---
|
||||
if (prData && prData.claim_titles && prData.claim_titles.length > 0) {
|
||||
html += '<div class="section-title">Claims (' + prData.claim_titles.length + ')</div>';
|
||||
html += '<ul class="claim-list">';
|
||||
prData.claim_titles.forEach(function(t) {
|
||||
html += '<li>' + esc(t) + '</li>';
|
||||
});
|
||||
html += '</ul>';
|
||||
}
|
||||
|
||||
// --- Issues summary ---
|
||||
var issues = [];
|
||||
if (data.timeline) {
|
||||
data.timeline.forEach(function(ev) {
|
||||
if (ev.detail && ev.detail.issues) {
|
||||
var iss = ev.detail.issues;
|
||||
if (typeof iss === 'string') { try { iss = JSON.parse(iss); } catch(e) { iss = [iss]; } }
|
||||
if (Array.isArray(iss)) {
|
||||
iss.forEach(function(i) {
|
||||
var label = String(i).replace(/_/g, ' ');
|
||||
if (issues.indexOf(label) === -1) issues.push(label);
|
||||
});
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
if (prData && prData.review_snippet) {
|
||||
html += '<div class="issues-box">' + esc(prData.review_snippet) + '</div>';
|
||||
} else if (issues.length > 0) {
|
||||
html += '<div class="issues-box">Issues: ' + issues.map(esc).join(', ') + '</div>';
|
||||
}
|
||||
|
||||
// --- Eval chain (who reviewed with what model) ---
|
||||
var models = {};
|
||||
if (data.timeline) {
|
||||
data.timeline.forEach(function(ev) {
|
||||
if (ev.detail) {
|
||||
if (ev.detail.model) models[ev.stage + '.' + ev.event] = ev.detail.model;
|
||||
if (ev.detail.domain_model) models['domain_review'] = ev.detail.domain_model;
|
||||
if (ev.detail.leo_model) models['leo_review'] = ev.detail.leo_model;
|
||||
}
|
||||
});
|
||||
}
|
||||
if (Object.keys(models).length > 0) {
|
||||
html += '<div class="eval-chain">';
|
||||
html += '<strong style="color:#58a6ff;">Eval chain:</strong> ';
|
||||
var parts = [];
|
||||
if (models['triage.haiku_triage'] || models['triage.deterministic_triage'])
|
||||
parts.push('<span class="step"><span class="step-label">Triage</span> <span class="step-model">' + shortModel(models['triage.haiku_triage'] || 'deterministic') + '</span></span>');
|
||||
if (models['domain_review'])
|
||||
parts.push('<span class="step"><span class="step-label">Domain</span> <span class="step-model">' + shortModel(models['domain_review']) + '</span></span>');
|
||||
if (models['leo_review'])
|
||||
parts.push('<span class="step"><span class="step-label">Leo</span> <span class="step-model">' + shortModel(models['leo_review']) + '</span></span>');
|
||||
html += parts.length > 0 ? parts.join(' <span class="arrow">→</span> ') : '<span style="color:#484f58;">No model data</span>';
|
||||
html += '</div>';
|
||||
}
|
||||
|
||||
// --- Timeline ---
|
||||
if (data.timeline && data.timeline.length > 0) {
|
||||
html += '<div class="section-title">Timeline</div>';
|
||||
html += '<ul class="trace-timeline">';
|
||||
data.timeline.forEach(function(ev) {
|
||||
var cls = ev.event === 'approved' ? 'ev-approved' :
|
||||
(ev.event === 'domain_rejected' || ev.event === 'tier05_rejected') ? 'ev-rejected' :
|
||||
ev.event === 'changes_requested' ? 'ev-changes' : '';
|
||||
var ts = ev.timestamp ? ev.timestamp.substring(0, 19).replace('T', ' ') : '';
|
||||
var detail = '';
|
||||
if (ev.detail) {
|
||||
if (ev.detail.tier) detail += ' tier=' + ev.detail.tier;
|
||||
if (ev.detail.reason) detail += ' — ' + esc(ev.detail.reason);
|
||||
if (ev.detail.model) detail += ' [' + esc(shortModel(ev.detail.model)) + ']';
|
||||
if (ev.detail.review_text) {
|
||||
detail += '<div class="review-text">' + esc(ev.detail.review_text).substring(0, 2000) + '</div>';
|
||||
}
|
||||
if (ev.detail.domain_review_text) {
|
||||
detail += '<div class="review-text"><strong>Domain review:</strong><br>' + esc(ev.detail.domain_review_text).substring(0, 2000) + '</div>';
|
||||
}
|
||||
if (ev.detail.leo_review_text) {
|
||||
detail += '<div class="review-text"><strong>Leo review:</strong><br>' + esc(ev.detail.leo_review_text).substring(0, 2000) + '</div>';
|
||||
}
|
||||
}
|
||||
html += '<li class="' + cls + '">' +
|
||||
'<span class="ts">' + ts + '</span> ' +
|
||||
'<span class="ev">' + esc(ev.stage + '.' + ev.event) + '</span>' +
|
||||
detail + '</li>';
|
||||
});
|
||||
html += '</ul>';
|
||||
} else {
|
||||
html += '<div style="color:#484f58;font-size:12px;margin-top:8px;">No timeline events</div>';
|
||||
}
|
||||
|
||||
// --- Reviews ---
|
||||
if (data.reviews && data.reviews.length > 0) {
|
||||
html += '<div class="section-title">Reviews</div>';
|
||||
data.reviews.forEach(function(r) {
|
||||
var cls = r.outcome === 'approved' ? 'badge-green' :
|
||||
r.outcome === 'rejected' ? 'badge-red' : 'badge-yellow';
|
||||
html += '<div style="margin:4px 0;">' +
|
||||
'<span class="badge ' + cls + '">' + esc(r.outcome) + '</span> ' +
|
||||
'<span style="color:#8b949e;font-size:11px;">' + esc(r.reviewer || '') + ' ' +
|
||||
(r.model ? '[' + esc(shortModel(r.model)) + ']' : '') + ' ' +
|
||||
(r.reviewed_at || '').substring(0, 19) + '</span>';
|
||||
if (r.rejection_reason) {
|
||||
html += ' <code>' + esc(r.rejection_reason) + '</code>';
|
||||
}
|
||||
if (r.notes) {
|
||||
html += '<div class="review-text">' + esc(r.notes) + '</div>';
|
||||
}
|
||||
html += '</div>';
|
||||
});
|
||||
}
|
||||
|
||||
panel.innerHTML = html || '<div style="color:#484f58;font-size:12px;">No trace data</div>';
|
||||
}).catch(function() {
|
||||
panel.innerHTML = '<div style="color:#f85149;font-size:12px;">Failed to load trace</div>';
|
||||
});
|
||||
}
|
||||
|
||||
// Filter listeners
|
||||
['filter-domain', 'filter-outcome', 'filter-tier'].forEach(function(id) {
|
||||
document.getElementById(id).addEventListener('change', applyFilters);
|
||||
});
|
||||
document.getElementById('filter-days').addEventListener('change', loadData);
|
||||
|
||||
// Pagination
|
||||
document.getElementById('pg-prev').addEventListener('click', function() { page--; renderTable(); });
|
||||
document.getElementById('pg-next').addEventListener('click', function() { page++; renderTable(); });
|
||||
|
||||
// Init
|
||||
loadData();
|
||||
</script>"""
|
||||
|
||||
return render_page(
|
||||
title="PR Lifecycle",
|
||||
subtitle="Every PR through the pipeline — triage to merge",
|
||||
active_path="/prs",
|
||||
body_html=body,
|
||||
scripts=scripts,
|
||||
extra_css=EXTRA_CSS,
|
||||
timestamp=now.strftime("%Y-%m-%d %H:%M UTC"),
|
||||
)
|
||||
1349
diagnostics/dashboard_routes.py
Normal file
1349
diagnostics/dashboard_routes.py
Normal file
File diff suppressed because it is too large
Load diff
279
diagnostics/research_routes.py
Normal file
279
diagnostics/research_routes.py
Normal file
|
|
@ -0,0 +1,279 @@
|
|||
"""Dashboard API routes for research session + cost tracking.
|
||||
|
||||
Argus-side read-only endpoints. These query the data that
|
||||
research_tracking.py writes to pipeline.db.
|
||||
|
||||
Add to app.py after alerting_routes setup.
|
||||
"""
|
||||
|
||||
import json
|
||||
import sqlite3
|
||||
from aiohttp import web
|
||||
|
||||
|
||||
def _conn(app):
|
||||
"""Read-only connection to pipeline.db."""
|
||||
db_path = app["db_path"]
|
||||
conn = sqlite3.connect(f"file:{db_path}?mode=ro", uri=True)
|
||||
conn.row_factory = sqlite3.Row
|
||||
return conn
|
||||
|
||||
|
||||
async def handle_api_research_sessions(request):
|
||||
"""GET /api/research-sessions?agent=&domain=&days=7
|
||||
|
||||
Returns research sessions with linked sources and cost data.
|
||||
"""
|
||||
agent = request.query.get("agent")
|
||||
domain = request.query.get("domain")
|
||||
try:
|
||||
days = int(request.query.get("days", 7))
|
||||
except (ValueError, TypeError):
|
||||
days = 7
|
||||
|
||||
conn = _conn(request.app)
|
||||
try:
|
||||
where = ["rs.started_at >= datetime('now', ?)"]
|
||||
params = [f"-{days} days"]
|
||||
|
||||
if agent:
|
||||
where.append("rs.agent = ?")
|
||||
params.append(agent)
|
||||
if domain:
|
||||
where.append("rs.domain = ?")
|
||||
params.append(domain)
|
||||
|
||||
where_clause = " AND ".join(where)
|
||||
|
||||
sessions = conn.execute(f"""
|
||||
SELECT rs.*,
|
||||
GROUP_CONCAT(s.path, '||') as source_paths,
|
||||
GROUP_CONCAT(s.status, '||') as source_statuses,
|
||||
GROUP_CONCAT(s.claims_count, '||') as source_claims,
|
||||
GROUP_CONCAT(COALESCE(s.cost_usd, 0), '||') as source_costs
|
||||
FROM research_sessions rs
|
||||
LEFT JOIN sources s ON s.session_id = rs.id
|
||||
WHERE {where_clause}
|
||||
GROUP BY rs.id
|
||||
ORDER BY rs.started_at DESC
|
||||
""", params).fetchall()
|
||||
|
||||
result = []
|
||||
for s in sessions:
|
||||
sources = []
|
||||
if s["source_paths"]:
|
||||
paths = s["source_paths"].split("||")
|
||||
statuses = (s["source_statuses"] or "").split("||")
|
||||
claims = (s["source_claims"] or "").split("||")
|
||||
costs = (s["source_costs"] or "").split("||")
|
||||
for i, p in enumerate(paths):
|
||||
sources.append({
|
||||
"path": p,
|
||||
"status": statuses[i] if i < len(statuses) else None,
|
||||
"claims_count": int(claims[i]) if i < len(claims) and claims[i] else 0,
|
||||
"extraction_cost": float(costs[i]) if i < len(costs) and costs[i] else 0,
|
||||
})
|
||||
|
||||
result.append({
|
||||
"id": s["id"],
|
||||
"agent": s["agent"],
|
||||
"domain": s["domain"],
|
||||
"topic": s["topic"],
|
||||
"reasoning": s["reasoning"],
|
||||
"summary": s["summary"],
|
||||
"sources_planned": s["sources_planned"],
|
||||
"sources_produced": s["sources_produced"],
|
||||
"model": s["model"],
|
||||
"input_tokens": s["input_tokens"],
|
||||
"output_tokens": s["output_tokens"],
|
||||
"research_cost": s["cost_usd"],
|
||||
"extraction_cost": sum(src["extraction_cost"] for src in sources),
|
||||
"total_cost": s["cost_usd"] + sum(src["extraction_cost"] for src in sources),
|
||||
"total_claims": sum(src["claims_count"] for src in sources),
|
||||
"status": s["status"],
|
||||
"started_at": s["started_at"],
|
||||
"completed_at": s["completed_at"],
|
||||
"sources": sources,
|
||||
})
|
||||
|
||||
# Summary stats
|
||||
total_sessions = len(result)
|
||||
total_cost = sum(r["total_cost"] for r in result)
|
||||
total_claims = sum(r["total_claims"] for r in result)
|
||||
total_sources = sum(r["sources_produced"] for r in result)
|
||||
|
||||
return web.json_response({
|
||||
"summary": {
|
||||
"sessions": total_sessions,
|
||||
"total_cost": round(total_cost, 2),
|
||||
"total_claims": total_claims,
|
||||
"total_sources": total_sources,
|
||||
"avg_cost_per_claim": round(total_cost / total_claims, 4) if total_claims else 0,
|
||||
"avg_cost_per_session": round(total_cost / total_sessions, 4) if total_sessions else 0,
|
||||
},
|
||||
"sessions": result,
|
||||
})
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
async def handle_api_costs(request):
|
||||
"""GET /api/costs?days=14&by=stage|model|date
|
||||
|
||||
Comprehensive cost breakdown. Works with EXISTING data in costs table
|
||||
plus the new extraction costs once backfilled.
|
||||
"""
|
||||
try:
|
||||
days = int(request.query.get("days", 14))
|
||||
except (ValueError, TypeError):
|
||||
days = 14
|
||||
group_by = request.query.get("by", "stage")
|
||||
|
||||
conn = _conn(request.app)
|
||||
try:
|
||||
valid_groups = {"stage", "model", "date"}
|
||||
if group_by not in valid_groups:
|
||||
group_by = "stage"
|
||||
|
||||
rows = conn.execute(f"""
|
||||
SELECT {group_by},
|
||||
SUM(calls) as total_calls,
|
||||
SUM(input_tokens) as total_input,
|
||||
SUM(output_tokens) as total_output,
|
||||
SUM(cost_usd) as total_cost
|
||||
FROM costs
|
||||
WHERE date >= date('now', ?)
|
||||
GROUP BY {group_by}
|
||||
ORDER BY total_cost DESC
|
||||
""", (f"-{days} days",)).fetchall()
|
||||
|
||||
result = []
|
||||
for r in rows:
|
||||
result.append({
|
||||
group_by: r[group_by],
|
||||
"calls": r["total_calls"],
|
||||
"input_tokens": r["total_input"],
|
||||
"output_tokens": r["total_output"],
|
||||
"cost_usd": round(r["total_cost"], 4),
|
||||
})
|
||||
|
||||
grand_total = sum(r["cost_usd"] for r in result)
|
||||
|
||||
# Also get per-agent cost from sources table (extraction costs)
|
||||
agent_costs = conn.execute("""
|
||||
SELECT p.agent,
|
||||
COUNT(DISTINCT s.path) as sources,
|
||||
SUM(s.cost_usd) as extraction_cost,
|
||||
SUM(s.claims_count) as claims
|
||||
FROM sources s
|
||||
LEFT JOIN prs p ON p.source_path = s.path
|
||||
WHERE s.cost_usd > 0
|
||||
GROUP BY p.agent
|
||||
ORDER BY extraction_cost DESC
|
||||
""").fetchall()
|
||||
|
||||
agent_breakdown = []
|
||||
for r in agent_costs:
|
||||
agent_breakdown.append({
|
||||
"agent": r["agent"] or "unlinked",
|
||||
"sources": r["sources"],
|
||||
"extraction_cost": round(r["extraction_cost"], 2),
|
||||
"claims": r["claims"],
|
||||
"cost_per_claim": round(r["extraction_cost"] / r["claims"], 4) if r["claims"] else 0,
|
||||
})
|
||||
|
||||
return web.json_response({
|
||||
"period_days": days,
|
||||
"grand_total": round(grand_total, 2),
|
||||
"by_" + group_by: result,
|
||||
"by_agent": agent_breakdown,
|
||||
})
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
async def handle_api_source_detail(request):
|
||||
"""GET /api/source/{path}
|
||||
|
||||
Full lifecycle of a single source: research session → extraction → claims → eval outcomes.
|
||||
"""
|
||||
source_path = request.match_info["path"]
|
||||
|
||||
conn = _conn(request.app)
|
||||
try:
|
||||
# Try exact match first, fall back to suffix match (anchored)
|
||||
source = conn.execute(
|
||||
"SELECT * FROM sources WHERE path = ?",
|
||||
(source_path,),
|
||||
).fetchone()
|
||||
if not source:
|
||||
# Suffix match — anchor with / prefix to avoid substring hits
|
||||
source = conn.execute(
|
||||
"SELECT * FROM sources WHERE path LIKE ? ORDER BY length(path) LIMIT 1",
|
||||
(f"%/{source_path}",),
|
||||
).fetchone()
|
||||
|
||||
if not source:
|
||||
return web.json_response({"error": "Source not found"}, status=404)
|
||||
|
||||
result = dict(source)
|
||||
|
||||
# Get research session if linked
|
||||
if source["session_id"]:
|
||||
session = conn.execute(
|
||||
"SELECT * FROM research_sessions WHERE id = ?",
|
||||
(source["session_id"],),
|
||||
).fetchone()
|
||||
result["research_session"] = dict(session) if session else None
|
||||
else:
|
||||
result["research_session"] = None
|
||||
|
||||
# Get PRs from this source
|
||||
prs = conn.execute(
|
||||
"SELECT number, status, domain, agent, tier, leo_verdict, domain_verdict, "
|
||||
"cost_usd, created_at, merged_at, commit_type, transient_retries, substantive_retries, last_error "
|
||||
"FROM prs WHERE source_path = ?",
|
||||
(source["path"],),
|
||||
).fetchall()
|
||||
result["prs"] = [dict(p) for p in prs]
|
||||
|
||||
# Get eval events from audit_log for those PRs
|
||||
# NOTE: audit_log.detail is mixed — some rows are JSON (evaluate events),
|
||||
# some are plain text. Use json_valid() to filter safely.
|
||||
pr_numbers = [p["number"] for p in prs]
|
||||
if pr_numbers:
|
||||
placeholders = ",".join("?" * len(pr_numbers))
|
||||
evals = conn.execute(f"""
|
||||
SELECT * FROM audit_log
|
||||
WHERE stage = 'evaluate'
|
||||
AND json_valid(detail)
|
||||
AND json_extract(detail, '$.pr') IN ({placeholders})
|
||||
ORDER BY timestamp
|
||||
""", pr_numbers).fetchall()
|
||||
result["eval_history"] = [
|
||||
{"timestamp": e["timestamp"], "event": e["event"],
|
||||
"detail": json.loads(e["detail"]) if e["detail"] else None}
|
||||
for e in evals
|
||||
]
|
||||
else:
|
||||
result["eval_history"] = []
|
||||
|
||||
return web.json_response(result)
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def setup_research_routes(app):
|
||||
"""Register research tracking routes. Call from create_app()."""
|
||||
app.router.add_get("/api/research-sessions", handle_api_research_sessions)
|
||||
app.router.add_get("/api/costs", handle_api_costs)
|
||||
app.router.add_get("/api/source/{path:.+}", handle_api_source_detail)
|
||||
|
||||
|
||||
# Public paths to add to auth middleware
|
||||
RESEARCH_PUBLIC_PATHS = frozenset({
|
||||
"/api/research-sessions",
|
||||
"/api/costs",
|
||||
})
|
||||
# /api/source/{path} needs prefix matching — add to auth middleware:
|
||||
# if path.startswith("/api/source/"): allow
|
||||
419
diagnostics/research_tracking.py
Normal file
419
diagnostics/research_tracking.py
Normal file
|
|
@ -0,0 +1,419 @@
|
|||
"""Research session tracking + cost attribution for the Teleo pipeline.
|
||||
|
||||
This module adds three capabilities:
|
||||
1. research_sessions table — tracks WHY agents researched, what they found interesting,
|
||||
session cost, and links to generated sources
|
||||
2. Extraction cost attribution — writes per-source cost to sources.cost_usd after extraction
|
||||
3. Source → claim linkage — ensures prs.source_path is always populated
|
||||
|
||||
Designed for Epimetheus to integrate into the pipeline. Argus built the spec;
|
||||
Ganymede reviews; Epimetheus wires it in.
|
||||
|
||||
Data flow:
|
||||
Agent research session → research_sessions row (with reasoning + summary)
|
||||
→ sources created (with session_id FK)
|
||||
→ extraction runs (cost written to sources.cost_usd + costs table)
|
||||
→ PRs created (source_path populated)
|
||||
→ claims merged (traceable back to session)
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import sqlite3
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
|
||||
logger = logging.getLogger("research_tracking")
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Migration v11: research_sessions table + sources.session_id FK
|
||||
# (v9 is current; v10 is Epimetheus's eval pipeline migration)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
MIGRATION_V11_SQL = """
|
||||
-- Research session tracking table
|
||||
CREATE TABLE IF NOT EXISTS research_sessions (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
agent TEXT NOT NULL,
|
||||
-- Which agent ran the research (leo, rio, astra, etc.)
|
||||
domain TEXT,
|
||||
-- Primary domain of the research
|
||||
topic TEXT NOT NULL,
|
||||
-- What they researched (short description)
|
||||
reasoning TEXT,
|
||||
-- WHY they chose this topic (agent's own explanation)
|
||||
summary TEXT,
|
||||
-- What they found most interesting/relevant
|
||||
sources_planned INTEGER DEFAULT 0,
|
||||
-- How many sources they intended to produce
|
||||
sources_produced INTEGER DEFAULT 0,
|
||||
-- How many actually materialized
|
||||
model TEXT,
|
||||
-- Model used for research (e.g. claude-opus-4-6)
|
||||
input_tokens INTEGER DEFAULT 0,
|
||||
output_tokens INTEGER DEFAULT 0,
|
||||
cost_usd REAL DEFAULT 0,
|
||||
-- Total research session cost (LLM calls for discovery + writing)
|
||||
status TEXT DEFAULT 'running',
|
||||
-- running, completed, failed, partial
|
||||
started_at TEXT DEFAULT (datetime('now')),
|
||||
completed_at TEXT,
|
||||
metadata TEXT DEFAULT '{}'
|
||||
-- JSON: any extra context (prompt version, search queries used, etc.)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_rs_agent ON research_sessions(agent);
|
||||
CREATE INDEX IF NOT EXISTS idx_rs_domain ON research_sessions(domain);
|
||||
CREATE INDEX IF NOT EXISTS idx_rs_started ON research_sessions(started_at);
|
||||
|
||||
-- Add session_id FK to sources table
|
||||
ALTER TABLE sources ADD COLUMN session_id INTEGER REFERENCES research_sessions(id);
|
||||
CREATE INDEX IF NOT EXISTS idx_sources_session ON sources(session_id);
|
||||
|
||||
-- Record migration
|
||||
INSERT INTO schema_version (version) VALUES (11);
|
||||
"""
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Cost attribution: write extraction cost to sources.cost_usd
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Pricing per million tokens (as of March 2026)
|
||||
MODEL_PRICING = {
|
||||
"anthropic/claude-sonnet-4.5": {"input": 3.00, "output": 15.00},
|
||||
"anthropic/claude-sonnet-4-5": {"input": 3.00, "output": 15.00},
|
||||
"anthropic/claude-haiku-4.5": {"input": 0.80, "output": 4.00},
|
||||
"anthropic/claude-haiku-4-5-20251001": {"input": 0.80, "output": 4.00},
|
||||
"minimax/minimax-m2.5": {"input": 0.14, "output": 0.56},
|
||||
}
|
||||
|
||||
|
||||
def calculate_cost(model: str, input_tokens: int, output_tokens: int) -> float:
|
||||
"""Calculate USD cost from model name and token counts."""
|
||||
pricing = MODEL_PRICING.get(model)
|
||||
if not pricing:
|
||||
# Default to Sonnet 4.5 pricing as conservative estimate
|
||||
logger.warning("Unknown model %s — using Sonnet 4.5 pricing", model)
|
||||
pricing = {"input": 3.00, "output": 15.00}
|
||||
return (input_tokens * pricing["input"] + output_tokens * pricing["output"]) / 1_000_000
|
||||
|
||||
|
||||
def record_extraction_cost(
|
||||
conn: sqlite3.Connection,
|
||||
source_path: str,
|
||||
model: str,
|
||||
input_tokens: int,
|
||||
output_tokens: int,
|
||||
):
|
||||
"""Write extraction cost to both sources.cost_usd and costs table.
|
||||
|
||||
Call this after each successful extraction call in openrouter-extract-v2.py.
|
||||
This is the missing link — the CSV logger records tokens but never writes
|
||||
cost back to the DB.
|
||||
"""
|
||||
cost = calculate_cost(model, input_tokens, output_tokens)
|
||||
|
||||
# Update source row
|
||||
conn.execute(
|
||||
"UPDATE sources SET cost_usd = cost_usd + ?, extraction_model = ? WHERE path = ?",
|
||||
(cost, model, source_path),
|
||||
)
|
||||
|
||||
# Also record in costs table for dashboard aggregation
|
||||
date = datetime.utcnow().strftime("%Y-%m-%d")
|
||||
conn.execute(
|
||||
"""INSERT INTO costs (date, model, stage, calls, input_tokens, output_tokens, cost_usd)
|
||||
VALUES (?, ?, 'extraction', 1, ?, ?, ?)
|
||||
ON CONFLICT(date, model, stage)
|
||||
DO UPDATE SET calls = calls + 1,
|
||||
input_tokens = input_tokens + excluded.input_tokens,
|
||||
output_tokens = output_tokens + excluded.output_tokens,
|
||||
cost_usd = cost_usd + excluded.cost_usd""",
|
||||
(date, model, input_tokens, output_tokens, cost),
|
||||
)
|
||||
|
||||
conn.commit()
|
||||
logger.info(
|
||||
"Recorded extraction cost for %s: $%.4f (%d in, %d out, %s)",
|
||||
source_path, cost, input_tokens, output_tokens, model,
|
||||
)
|
||||
return cost
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Research session lifecycle
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def start_session(
|
||||
conn: sqlite3.Connection,
|
||||
agent: str,
|
||||
topic: str,
|
||||
domain: Optional[str] = None,
|
||||
reasoning: Optional[str] = None,
|
||||
sources_planned: int = 0,
|
||||
model: Optional[str] = None,
|
||||
metadata: Optional[dict] = None,
|
||||
) -> int:
|
||||
"""Call at the START of a research session. Returns session_id.
|
||||
|
||||
The agent should call this before it begins producing sources,
|
||||
explaining what it plans to research and why.
|
||||
"""
|
||||
cur = conn.execute(
|
||||
"""INSERT INTO research_sessions
|
||||
(agent, domain, topic, reasoning, sources_planned, model, metadata)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?)""",
|
||||
(
|
||||
agent,
|
||||
domain,
|
||||
topic,
|
||||
reasoning,
|
||||
sources_planned,
|
||||
model,
|
||||
json.dumps(metadata or {}),
|
||||
),
|
||||
)
|
||||
conn.commit()
|
||||
session_id = cur.lastrowid
|
||||
logger.info("Started research session #%d: %s / %s", session_id, agent, topic)
|
||||
return session_id
|
||||
|
||||
|
||||
def link_source_to_session(
|
||||
conn: sqlite3.Connection,
|
||||
source_path: str,
|
||||
session_id: int,
|
||||
):
|
||||
"""Link a source file to its research session.
|
||||
|
||||
Call this when a source is written to inbox/ during a research session.
|
||||
"""
|
||||
conn.execute(
|
||||
"UPDATE sources SET session_id = ? WHERE path = ?",
|
||||
(session_id, source_path),
|
||||
)
|
||||
conn.execute(
|
||||
"""UPDATE research_sessions
|
||||
SET sources_produced = sources_produced + 1
|
||||
WHERE id = ?""",
|
||||
(session_id,),
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
|
||||
def complete_session(
|
||||
conn: sqlite3.Connection,
|
||||
session_id: int,
|
||||
summary: str,
|
||||
input_tokens: int = 0,
|
||||
output_tokens: int = 0,
|
||||
cost_usd: float = 0,
|
||||
status: str = "completed",
|
||||
):
|
||||
"""Call at the END of a research session.
|
||||
|
||||
The agent should summarize what it found most interesting/relevant.
|
||||
Cost should include ALL LLM calls made during the session (web search,
|
||||
analysis, source writing — everything).
|
||||
"""
|
||||
conn.execute(
|
||||
"""UPDATE research_sessions
|
||||
SET summary = ?, input_tokens = ?, output_tokens = ?,
|
||||
cost_usd = ?, status = ?, completed_at = datetime('now')
|
||||
WHERE id = ?""",
|
||||
(summary, input_tokens, output_tokens, cost_usd, status, session_id),
|
||||
)
|
||||
conn.commit()
|
||||
logger.info("Completed research session #%d: %s", session_id, status)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Source → PR linkage fix
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def ensure_source_path_on_pr(
|
||||
conn: sqlite3.Connection,
|
||||
pr_number: int,
|
||||
source_path: str,
|
||||
):
|
||||
"""Ensure prs.source_path is populated. Call during PR creation.
|
||||
|
||||
Currently 0/1451 PRs have source_path set. This is the fix.
|
||||
"""
|
||||
conn.execute(
|
||||
"UPDATE prs SET source_path = ? WHERE number = ? AND (source_path IS NULL OR source_path = '')",
|
||||
(source_path, pr_number),
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Backfill: attribute extraction costs from existing CSV log
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def backfill_extraction_costs(conn: sqlite3.Connection, csv_path: str):
|
||||
"""One-time backfill: read openrouter-usage.csv and write costs to sources + costs tables.
|
||||
|
||||
Run once to fill in the ~$338 of extraction costs that were logged to CSV
|
||||
but never written to the database.
|
||||
|
||||
Safe to re-run — only updates sources where cost_usd = 0, so partial
|
||||
runs can be resumed without double-counting.
|
||||
"""
|
||||
import csv
|
||||
|
||||
count = 0
|
||||
total_cost = 0.0
|
||||
with open(csv_path) as f:
|
||||
reader = csv.DictReader(f)
|
||||
for row in reader:
|
||||
source_file = row.get("source_file", "")
|
||||
model = row.get("model", "")
|
||||
try:
|
||||
in_tok = int(row.get("input_tokens", 0) or 0)
|
||||
out_tok = int(row.get("output_tokens", 0) or 0)
|
||||
except (ValueError, TypeError):
|
||||
continue
|
||||
|
||||
cost = calculate_cost(model, in_tok, out_tok)
|
||||
if cost <= 0:
|
||||
continue
|
||||
|
||||
# Try to match source_file to sources.path
|
||||
# CSV has filename, DB has full path — match on exact suffix
|
||||
# Use ORDER BY length(path) to prefer shortest (most specific) match
|
||||
matched = conn.execute(
|
||||
"SELECT path FROM sources WHERE path LIKE ? AND cost_usd = 0 ORDER BY length(path) LIMIT 1",
|
||||
(f"%/{source_file}" if "/" not in source_file else f"%{source_file}",),
|
||||
).fetchone()
|
||||
|
||||
if matched:
|
||||
conn.execute(
|
||||
"UPDATE sources SET cost_usd = ?, extraction_model = ? WHERE path = ?",
|
||||
(cost, model, matched[0]),
|
||||
)
|
||||
|
||||
# Always record in costs table
|
||||
date = row.get("date", "unknown")
|
||||
conn.execute(
|
||||
"""INSERT INTO costs (date, model, stage, calls, input_tokens, output_tokens, cost_usd)
|
||||
VALUES (?, ?, 'extraction', 1, ?, ?, ?)
|
||||
ON CONFLICT(date, model, stage)
|
||||
DO UPDATE SET calls = calls + 1,
|
||||
input_tokens = input_tokens + excluded.input_tokens,
|
||||
output_tokens = output_tokens + excluded.output_tokens,
|
||||
cost_usd = cost_usd + excluded.cost_usd""",
|
||||
(date, model, in_tok, out_tok, cost),
|
||||
)
|
||||
|
||||
count += 1
|
||||
total_cost += cost
|
||||
|
||||
conn.commit()
|
||||
logger.info("Backfilled %d extraction cost records, total $%.2f", count, total_cost)
|
||||
return count, total_cost
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Backfill: populate prs.source_path from branch naming convention
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def backfill_source_paths(conn: sqlite3.Connection):
|
||||
"""One-time backfill: derive source_path for existing PRs from branch names.
|
||||
|
||||
Branch format: extract/YYYY-MM-DD-source-name or similar patterns.
|
||||
Source path format: inbox/queue/YYYY-MM-DD-source-name.md
|
||||
"""
|
||||
rows = conn.execute(
|
||||
"SELECT number, branch FROM prs WHERE source_path IS NULL AND branch IS NOT NULL"
|
||||
).fetchall()
|
||||
|
||||
count = 0
|
||||
for number, branch in rows:
|
||||
# Try to extract source name from branch
|
||||
# Common patterns: extract/source-name, claims/source-name
|
||||
parts = branch.split("/", 1)
|
||||
if len(parts) < 2:
|
||||
continue
|
||||
source_stem = parts[1]
|
||||
|
||||
# Try to find matching source in DB — exact suffix match, shortest path wins
|
||||
matched = conn.execute(
|
||||
"SELECT path FROM sources WHERE path LIKE ? ORDER BY length(path) LIMIT 1",
|
||||
(f"%/{source_stem}%" if source_stem else "",),
|
||||
).fetchone()
|
||||
|
||||
if matched:
|
||||
conn.execute(
|
||||
"UPDATE prs SET source_path = ? WHERE number = ?",
|
||||
(matched[0], number),
|
||||
)
|
||||
count += 1
|
||||
|
||||
conn.commit()
|
||||
logger.info("Backfilled source_path for %d PRs", count)
|
||||
return count
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Integration points (for Epimetheus to wire in)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
INTEGRATION_GUIDE = """
|
||||
## Where to wire this in
|
||||
|
||||
### 1. openrouter-extract-v2.py — after successful extraction call
|
||||
|
||||
from research_tracking import record_extraction_cost
|
||||
|
||||
# After line 430 (content, usage = call_openrouter(...))
|
||||
# After line 672 (log_usage(...))
|
||||
record_extraction_cost(
|
||||
conn, args.source_file, args.model,
|
||||
usage.get("prompt_tokens", 0),
|
||||
usage.get("completion_tokens", 0),
|
||||
)
|
||||
|
||||
### 2. Agent research scripts — wrap research sessions
|
||||
|
||||
from research_tracking import start_session, link_source_to_session, complete_session
|
||||
|
||||
# At start of research:
|
||||
session_id = start_session(conn, agent="leo", topic="weapons stigmatization campaigns",
|
||||
domain="grand-strategy",
|
||||
reasoning="Following up on EU AI Act national security exclusion — exploring how stigmatization
|
||||
campaigns have historically driven arms control policy",
|
||||
sources_planned=6, model="claude-opus-4-6")
|
||||
|
||||
# As each source is written:
|
||||
link_source_to_session(conn, source_path, session_id)
|
||||
|
||||
# At end of research:
|
||||
complete_session(conn, session_id,
|
||||
summary="Ottawa Treaty mine ban model is the strongest parallel to AI weapons — same
|
||||
3-condition framework (humanitarian harm + low military utility + civil society
|
||||
coalition). Ukraine Shahed case is a near-miss triggering event.",
|
||||
input_tokens=total_in, output_tokens=total_out, cost_usd=total_cost)
|
||||
|
||||
### 3. PR creation in lib/merge.py or lib/validate.py — ensure source_path
|
||||
|
||||
from research_tracking import ensure_source_path_on_pr
|
||||
|
||||
# When creating a PR, pass the source:
|
||||
ensure_source_path_on_pr(conn, pr_number, source_path)
|
||||
|
||||
### 4. One-time backfills (run manually after migration)
|
||||
|
||||
from research_tracking import backfill_extraction_costs, backfill_source_paths
|
||||
|
||||
backfill_extraction_costs(conn, "/opt/teleo-eval/logs/openrouter-usage.csv")
|
||||
backfill_source_paths(conn)
|
||||
|
||||
### 5. Migration
|
||||
|
||||
Run MIGRATION_V11_SQL against pipeline.db after backing up.
|
||||
"""
|
||||
475
diagnostics/response_audit_routes.py
Normal file
475
diagnostics/response_audit_routes.py
Normal file
|
|
@ -0,0 +1,475 @@
|
|||
"""Response audit API routes — agent cost tracking, reasoning traces, unified activity.
|
||||
|
||||
Endpoints:
|
||||
GET /api/response-audit — paginated response list with cost columns
|
||||
GET /api/response-audit/{id} — single response detail with full tool_calls
|
||||
GET /api/agent-costs — aggregated cost view from response_audit
|
||||
GET /api/unified-activity — merged prs + response_audit timeline
|
||||
|
||||
Data source: response_audit table in pipeline.db (written by Epimetheus's Telegram bot).
|
||||
|
||||
Owner: Argus
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import sqlite3
|
||||
|
||||
from aiohttp import web
|
||||
|
||||
logger = logging.getLogger("argus.response_audit_routes")
|
||||
|
||||
|
||||
def _conn(app):
|
||||
"""Read-only connection to pipeline.db."""
|
||||
db_path = app["db_path"]
|
||||
conn = sqlite3.connect(f"file:{db_path}?mode=ro", uri=True)
|
||||
conn.row_factory = sqlite3.Row
|
||||
return conn
|
||||
|
||||
|
||||
# ─── GET /api/response-audit ─────────────────────────────────────────────
|
||||
|
||||
async def handle_response_audit_list(request):
|
||||
"""Paginated response audit list with cost and model data.
|
||||
|
||||
Query params:
|
||||
agent — filter by agent name
|
||||
hours — lookback window (default 24, max 168)
|
||||
limit — max results (default 50, max 200)
|
||||
offset — pagination offset (default 0)
|
||||
model — filter by model name (substring match)
|
||||
"""
|
||||
agent = request.query.get("agent")
|
||||
model_filter = request.query.get("model")
|
||||
try:
|
||||
hours = min(int(request.query.get("hours", 24)), 168)
|
||||
except (ValueError, TypeError):
|
||||
hours = 24
|
||||
try:
|
||||
limit = min(int(request.query.get("limit", 50)), 200)
|
||||
except (ValueError, TypeError):
|
||||
limit = 50
|
||||
try:
|
||||
offset = max(int(request.query.get("offset", 0)), 0)
|
||||
except (ValueError, TypeError):
|
||||
offset = 0
|
||||
|
||||
conn = _conn(request.app)
|
||||
try:
|
||||
where = ["timestamp > datetime('now', ?)"]
|
||||
params: list = [f"-{hours} hours"]
|
||||
|
||||
if agent:
|
||||
where.append("agent = ?")
|
||||
params.append(agent)
|
||||
if model_filter:
|
||||
where.append("model LIKE ?")
|
||||
params.append(f"%{model_filter}%")
|
||||
|
||||
where_clause = " AND ".join(where)
|
||||
|
||||
# Count total matching
|
||||
total = conn.execute(
|
||||
f"SELECT COUNT(*) as cnt FROM response_audit WHERE {where_clause}",
|
||||
params,
|
||||
).fetchone()["cnt"]
|
||||
|
||||
# Fetch page — exclude large text fields for list view
|
||||
rows = conn.execute(
|
||||
f"""SELECT id, timestamp, agent, model, query,
|
||||
prompt_tokens, completion_tokens,
|
||||
generation_cost, embedding_cost, total_cost,
|
||||
confidence_score, response_time_ms, query_type,
|
||||
CASE WHEN tool_calls IS NOT NULL AND tool_calls != '[]'
|
||||
THEN json_array_length(tool_calls)
|
||||
ELSE 0 END as tool_call_count,
|
||||
LENGTH(display_response) as response_length
|
||||
FROM response_audit
|
||||
WHERE {where_clause}
|
||||
ORDER BY timestamp DESC
|
||||
LIMIT ? OFFSET ?""",
|
||||
params + [limit, offset],
|
||||
).fetchall()
|
||||
|
||||
responses = []
|
||||
for r in rows:
|
||||
responses.append({
|
||||
"id": r["id"],
|
||||
"timestamp": r["timestamp"],
|
||||
"agent": r["agent"],
|
||||
"model": r["model"],
|
||||
"query": r["query"],
|
||||
"query_type": r["query_type"],
|
||||
"prompt_tokens": r["prompt_tokens"],
|
||||
"completion_tokens": r["completion_tokens"],
|
||||
"generation_cost": r["generation_cost"],
|
||||
"embedding_cost": r["embedding_cost"],
|
||||
"total_cost": r["total_cost"],
|
||||
"confidence": r["confidence_score"],
|
||||
"response_time_ms": r["response_time_ms"],
|
||||
"tool_call_count": r["tool_call_count"],
|
||||
"response_length": r["response_length"],
|
||||
})
|
||||
|
||||
return web.json_response({
|
||||
"total": total,
|
||||
"limit": limit,
|
||||
"offset": offset,
|
||||
"hours": hours,
|
||||
"responses": responses,
|
||||
})
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
# ─── GET /api/response-audit/{id} ────────────────────────────────────────
|
||||
|
||||
async def handle_response_audit_detail(request):
|
||||
"""Full response detail including reasoning trace and tool calls.
|
||||
|
||||
Returns the complete response_audit row with tool_calls parsed as JSON.
|
||||
"""
|
||||
try:
|
||||
audit_id = int(request.match_info["id"])
|
||||
except (ValueError, TypeError):
|
||||
return web.json_response({"error": "Invalid ID"}, status=400)
|
||||
|
||||
conn = _conn(request.app)
|
||||
try:
|
||||
row = conn.execute(
|
||||
"""SELECT id, timestamp, chat_id, user, agent, model,
|
||||
query, query_type, conversation_window,
|
||||
entities_matched, claims_matched,
|
||||
retrieval_layers_hit, retrieval_gap,
|
||||
market_data, research_context,
|
||||
tool_calls, raw_response, display_response,
|
||||
confidence_score, response_time_ms,
|
||||
prompt_tokens, completion_tokens,
|
||||
generation_cost, embedding_cost, total_cost,
|
||||
blocked, block_reason
|
||||
FROM response_audit WHERE id = ?""",
|
||||
(audit_id,),
|
||||
).fetchone()
|
||||
|
||||
if not row:
|
||||
return web.json_response({"error": "Response not found"}, status=404)
|
||||
|
||||
# Parse JSON fields
|
||||
def parse_json(val):
|
||||
if val is None:
|
||||
return None
|
||||
try:
|
||||
return json.loads(val)
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
return val
|
||||
|
||||
result = {
|
||||
"id": row["id"],
|
||||
"timestamp": row["timestamp"],
|
||||
"chat_id": row["chat_id"],
|
||||
"user": row["user"],
|
||||
"agent": row["agent"],
|
||||
"model": row["model"],
|
||||
"query": row["query"],
|
||||
"query_type": row["query_type"],
|
||||
"conversation_window": parse_json(row["conversation_window"]),
|
||||
"entities_matched": parse_json(row["entities_matched"]),
|
||||
"claims_matched": parse_json(row["claims_matched"]),
|
||||
"retrieval_layers_hit": parse_json(row["retrieval_layers_hit"]),
|
||||
"retrieval_gap": row["retrieval_gap"],
|
||||
"market_data": parse_json(row["market_data"]),
|
||||
"research_context": row["research_context"],
|
||||
"tool_calls": parse_json(row["tool_calls"]),
|
||||
"display_response": row["display_response"],
|
||||
"raw_response": row["raw_response"],
|
||||
"confidence_score": row["confidence_score"],
|
||||
"response_time_ms": row["response_time_ms"],
|
||||
"prompt_tokens": row["prompt_tokens"],
|
||||
"completion_tokens": row["completion_tokens"],
|
||||
"generation_cost": row["generation_cost"],
|
||||
"embedding_cost": row["embedding_cost"],
|
||||
"total_cost": row["total_cost"],
|
||||
"blocked": bool(row["blocked"]) if row["blocked"] is not None else None,
|
||||
"block_reason": row["block_reason"],
|
||||
}
|
||||
|
||||
# Compute iteration summary from tool_calls
|
||||
tool_calls = result["tool_calls"] or []
|
||||
if isinstance(tool_calls, list):
|
||||
reasoning_steps = [t for t in tool_calls if isinstance(t, dict) and t.get("type") == "reasoning"]
|
||||
tool_steps = [t for t in tool_calls if isinstance(t, dict) and t.get("type") == "tool_call"]
|
||||
result["trace_summary"] = {
|
||||
"total_steps": len(tool_calls),
|
||||
"reasoning_steps": len(reasoning_steps),
|
||||
"tool_steps": len(tool_steps),
|
||||
"tools_used": list({t.get("tool", "unknown") for t in tool_steps}),
|
||||
"total_duration_ms": sum(t.get("duration_ms", 0) for t in tool_steps),
|
||||
}
|
||||
else:
|
||||
result["trace_summary"] = None
|
||||
|
||||
return web.json_response(result)
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
# ─── GET /api/agent-costs ─────────────────────────────────────────────────
|
||||
|
||||
async def handle_agent_costs(request):
|
||||
"""Aggregated agent cost data from response_audit.
|
||||
|
||||
Query params:
|
||||
days — lookback window (default 7, max 30)
|
||||
by — grouping: agent, model, day (default agent)
|
||||
"""
|
||||
try:
|
||||
days = min(int(request.query.get("days", 7)), 30)
|
||||
except (ValueError, TypeError):
|
||||
days = 7
|
||||
group_by = request.query.get("by", "agent")
|
||||
agent = request.query.get("agent")
|
||||
|
||||
conn = _conn(request.app)
|
||||
try:
|
||||
if group_by == "model":
|
||||
group_col = "model"
|
||||
elif group_by == "day":
|
||||
group_col = "date(timestamp)"
|
||||
else:
|
||||
group_col = "agent"
|
||||
group_by = "agent"
|
||||
|
||||
where = ["timestamp > datetime('now', ?)"]
|
||||
params: list = [f"-{days} days"]
|
||||
if agent:
|
||||
where.append("agent = ?")
|
||||
params.append(agent)
|
||||
|
||||
where_clause = " AND ".join(where)
|
||||
|
||||
rows = conn.execute(
|
||||
f"""SELECT {group_col} as grp,
|
||||
COUNT(*) as responses,
|
||||
SUM(prompt_tokens) as total_prompt_tokens,
|
||||
SUM(completion_tokens) as total_completion_tokens,
|
||||
SUM(COALESCE(total_cost, generation_cost, 0)) as total_cost,
|
||||
AVG(COALESCE(total_cost, generation_cost, 0)) as avg_cost,
|
||||
AVG(response_time_ms) as avg_response_ms,
|
||||
AVG(confidence_score) as avg_confidence
|
||||
FROM response_audit
|
||||
WHERE {where_clause}
|
||||
GROUP BY grp
|
||||
ORDER BY total_cost DESC""",
|
||||
params,
|
||||
).fetchall()
|
||||
|
||||
breakdown = []
|
||||
for r in rows:
|
||||
breakdown.append({
|
||||
group_by: r["grp"],
|
||||
"responses": r["responses"],
|
||||
"prompt_tokens": r["total_prompt_tokens"] or 0,
|
||||
"completion_tokens": r["total_completion_tokens"] or 0,
|
||||
"total_cost": round(r["total_cost"] or 0, 4),
|
||||
"avg_cost_per_response": round(r["avg_cost"] or 0, 4),
|
||||
"avg_response_ms": round(r["avg_response_ms"] or 0, 0),
|
||||
"avg_confidence": round(r["avg_confidence"] or 0, 3) if r["avg_confidence"] else None,
|
||||
})
|
||||
|
||||
grand_total = sum(b["total_cost"] for b in breakdown)
|
||||
total_responses = sum(b["responses"] for b in breakdown)
|
||||
|
||||
# Daily trend (always included regardless of grouping)
|
||||
daily_where = ["timestamp > datetime('now', ?)"]
|
||||
daily_params: list = [f"-{days} days"]
|
||||
if agent:
|
||||
daily_where.append("agent = ?")
|
||||
daily_params.append(agent)
|
||||
|
||||
daily = conn.execute(
|
||||
f"""SELECT date(timestamp) as day,
|
||||
COUNT(*) as responses,
|
||||
SUM(COALESCE(total_cost, generation_cost, 0)) as cost
|
||||
FROM response_audit
|
||||
WHERE {' AND '.join(daily_where)}
|
||||
GROUP BY day ORDER BY day""",
|
||||
daily_params,
|
||||
).fetchall()
|
||||
|
||||
daily_trend = [
|
||||
{"date": r["day"], "responses": r["responses"],
|
||||
"cost": round(r["cost"] or 0, 4)}
|
||||
for r in daily
|
||||
]
|
||||
|
||||
return web.json_response({
|
||||
"period_days": days,
|
||||
"grand_total": round(grand_total, 4),
|
||||
"total_responses": total_responses,
|
||||
"avg_cost_per_response": round(grand_total / total_responses, 4) if total_responses else 0,
|
||||
f"by_{group_by}": breakdown,
|
||||
"daily_trend": daily_trend,
|
||||
})
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
# ─── GET /api/unified-activity ────────────────────────────────────────────
|
||||
|
||||
async def handle_unified_activity(request):
|
||||
"""Unified activity feed merging pipeline ops (prs) + agent responses (response_audit).
|
||||
|
||||
Query params:
|
||||
hours — lookback window (default 24, max 168)
|
||||
limit — max results (default 100, max 500)
|
||||
agent — filter by agent name
|
||||
type — filter: pipeline, response, or all (default all)
|
||||
"""
|
||||
try:
|
||||
hours = min(int(request.query.get("hours", 24)), 168)
|
||||
except (ValueError, TypeError):
|
||||
hours = 24
|
||||
try:
|
||||
limit = min(int(request.query.get("limit", 100)), 500)
|
||||
except (ValueError, TypeError):
|
||||
limit = 100
|
||||
agent = request.query.get("agent")
|
||||
activity_type = request.query.get("type", "all")
|
||||
|
||||
conn = _conn(request.app)
|
||||
try:
|
||||
entries = []
|
||||
|
||||
# Pipeline events from prs table
|
||||
if activity_type in ("all", "pipeline"):
|
||||
pr_where = ["COALESCE(merged_at, created_at) > datetime('now', ?)"]
|
||||
pr_params: list = [f"-{hours} hours"]
|
||||
if agent:
|
||||
pr_where.append("agent = ?")
|
||||
pr_params.append(agent)
|
||||
|
||||
prs = conn.execute(
|
||||
f"""SELECT number, branch, status, domain, agent, tier,
|
||||
commit_type, cost_usd,
|
||||
created_at, merged_at,
|
||||
leo_verdict, domain_verdict
|
||||
FROM prs
|
||||
WHERE {' AND '.join(pr_where)}
|
||||
ORDER BY COALESCE(merged_at, created_at) DESC""",
|
||||
pr_params,
|
||||
).fetchall()
|
||||
|
||||
for pr in prs:
|
||||
ts = pr["merged_at"] or pr["created_at"]
|
||||
# Derive action description from status
|
||||
if pr["status"] == "merged":
|
||||
action = f"Merged {pr['commit_type'] or 'PR'}"
|
||||
elif pr["status"] == "closed":
|
||||
action = f"Closed {pr['commit_type'] or 'PR'}"
|
||||
elif pr["status"] in ("approved", "reviewing"):
|
||||
action = f"{pr['commit_type'] or 'PR'} awaiting merge"
|
||||
else:
|
||||
action = f"{pr['commit_type'] or 'PR'} {pr['status']}"
|
||||
|
||||
entries.append({
|
||||
"timestamp": ts,
|
||||
"type": "pipeline",
|
||||
"agent": pr["agent"],
|
||||
"action": action,
|
||||
"domain": pr["domain"],
|
||||
"pr_number": pr["number"],
|
||||
"branch": pr["branch"],
|
||||
"status": pr["status"],
|
||||
"commit_type": pr["commit_type"],
|
||||
"cost": pr["cost_usd"],
|
||||
"detail": {
|
||||
"tier": pr["tier"],
|
||||
"leo_verdict": pr["leo_verdict"],
|
||||
"domain_verdict": pr["domain_verdict"],
|
||||
},
|
||||
})
|
||||
|
||||
# Agent responses from response_audit
|
||||
if activity_type in ("all", "response"):
|
||||
ra_where = ["timestamp > datetime('now', ?)"]
|
||||
ra_params: list = [f"-{hours} hours"]
|
||||
if agent:
|
||||
ra_where.append("agent = ?")
|
||||
ra_params.append(agent)
|
||||
|
||||
responses = conn.execute(
|
||||
f"""SELECT id, timestamp, agent, model, query,
|
||||
generation_cost, response_time_ms,
|
||||
confidence_score,
|
||||
CASE WHEN tool_calls IS NOT NULL AND tool_calls != '[]'
|
||||
THEN json_array_length(tool_calls)
|
||||
ELSE 0 END as tool_call_count
|
||||
FROM response_audit
|
||||
WHERE {' AND '.join(ra_where)}
|
||||
ORDER BY timestamp DESC""",
|
||||
ra_params,
|
||||
).fetchall()
|
||||
|
||||
for r in responses:
|
||||
# Truncate query for feed display
|
||||
query_preview = (r["query"] or "")[:120]
|
||||
if len(r["query"] or "") > 120:
|
||||
query_preview += "..."
|
||||
|
||||
entries.append({
|
||||
"timestamp": r["timestamp"],
|
||||
"type": "response",
|
||||
"agent": r["agent"],
|
||||
"action": f"Responded to query ({r['tool_call_count']} tool calls)",
|
||||
"domain": None,
|
||||
"pr_number": None,
|
||||
"audit_id": r["id"],
|
||||
"query_preview": query_preview,
|
||||
"model": r["model"],
|
||||
"cost": r["generation_cost"],
|
||||
"detail": {
|
||||
"response_time_ms": r["response_time_ms"],
|
||||
"confidence": r["confidence_score"],
|
||||
"tool_call_count": r["tool_call_count"],
|
||||
},
|
||||
})
|
||||
|
||||
# Sort combined entries by timestamp descending
|
||||
entries.sort(key=lambda e: e["timestamp"] or "", reverse=True)
|
||||
entries = entries[:limit]
|
||||
|
||||
# Summary stats
|
||||
pipeline_count = sum(1 for e in entries if e["type"] == "pipeline")
|
||||
response_count = sum(1 for e in entries if e["type"] == "response")
|
||||
total_cost = sum(e.get("cost") or 0 for e in entries)
|
||||
|
||||
return web.json_response({
|
||||
"hours": hours,
|
||||
"total_entries": len(entries),
|
||||
"pipeline_events": pipeline_count,
|
||||
"response_events": response_count,
|
||||
"total_cost": round(total_cost, 4),
|
||||
"entries": entries,
|
||||
})
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
# ─── Registration ─────────────────────────────────────────────────────────
|
||||
|
||||
def register_response_audit_routes(app):
|
||||
"""Register response audit API routes. Call from create_app()."""
|
||||
app.router.add_get("/api/response-audit", handle_response_audit_list)
|
||||
app.router.add_get("/api/response-audit/{id}", handle_response_audit_detail)
|
||||
app.router.add_get("/api/agent-costs", handle_agent_costs)
|
||||
app.router.add_get("/api/unified-activity", handle_unified_activity)
|
||||
|
||||
|
||||
# Public paths for auth middleware
|
||||
RESPONSE_AUDIT_PUBLIC_PATHS = frozenset({
|
||||
"/api/response-audit",
|
||||
"/api/agent-costs",
|
||||
"/api/unified-activity",
|
||||
})
|
||||
# /api/response-audit/{id} needs prefix matching in auth middleware
|
||||
222
diagnostics/review_queue.py
Normal file
222
diagnostics/review_queue.py
Normal file
|
|
@ -0,0 +1,222 @@
|
|||
"""Review queue: fetches open PRs from Forgejo, classifies and enriches them.
|
||||
|
||||
Data sources:
|
||||
- Forgejo API (git.livingip.xyz) for PR metadata, reviews, changed files
|
||||
- pipeline.db prs table for eval status cross-reference
|
||||
|
||||
Display priority: broken > needs-review (by age) > approved-awaiting-merge > changes-requested
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
import aiohttp
|
||||
|
||||
logger = logging.getLogger("argus.review_queue")
|
||||
|
||||
FORGEJO_BASE = "https://git.livingip.xyz/api/v1"
|
||||
REPO = "teleo/teleo-codex"
|
||||
|
||||
# Domain detection from branch prefixes or path patterns
|
||||
DOMAIN_KEYWORDS = {
|
||||
"internet-finance": ["internet-finance", "defi", "dao", "prediction-market"],
|
||||
"entertainment": ["entertainment", "clay", "media", "ip-"],
|
||||
"ai-alignment": ["ai-alignment", "alignment", "theseus"],
|
||||
"health": ["health", "vida", "biotech", "glp"],
|
||||
"space-development": ["space", "astra", "orbital", "lunar"],
|
||||
"energy": ["energy", "solar", "nuclear", "fusion"],
|
||||
"grand-strategy": ["grand-strategy", "leo", "strategy"],
|
||||
"collective-intelligence": ["collective-intelligence", "coordination"],
|
||||
"critical-systems": ["critical-systems", "complexity", "emergence"],
|
||||
"teleological-economics": ["teleological-economics", "disruption", "attractor"],
|
||||
"cultural-dynamics": ["cultural-dynamics", "memetics", "narrative"],
|
||||
"mechanisms": ["mechanisms", "futarchy", "governance"],
|
||||
"living-capital": ["living-capital", "investment"],
|
||||
"living-agents": ["living-agents", "agent-architecture"],
|
||||
"teleohumanity": ["teleohumanity", "worldview"],
|
||||
"general": ["general"],
|
||||
}
|
||||
|
||||
|
||||
def _detect_domain(branch: str, title: str, files: list[dict]) -> str:
|
||||
"""Detect domain from branch name, title, or changed file paths."""
|
||||
text = f"{branch} {title}".lower()
|
||||
|
||||
# Check branch/title
|
||||
for domain, keywords in DOMAIN_KEYWORDS.items():
|
||||
for kw in keywords:
|
||||
if kw in text:
|
||||
return domain
|
||||
|
||||
# Check file paths
|
||||
for f in files:
|
||||
path = f.get("filename", "")
|
||||
if path.startswith("domains/") or path.startswith("foundations/") or path.startswith("core/"):
|
||||
parts = path.split("/")
|
||||
if len(parts) >= 2:
|
||||
return parts[1]
|
||||
|
||||
return "unknown"
|
||||
|
||||
|
||||
def _classify_files(files: list[dict]) -> dict[str, int]:
|
||||
"""Count claim, enrichment, and challenge files from changed files list."""
|
||||
counts = {"claim_count": 0, "enrichment_count": 0, "challenge_count": 0}
|
||||
for f in files:
|
||||
path = f.get("filename", "")
|
||||
status = f.get("status", "") # added, modified, removed
|
||||
|
||||
if not path.startswith("domains/") and not path.startswith("foundations/") and not path.startswith("core/"):
|
||||
continue
|
||||
|
||||
name = path.split("/")[-1].lower()
|
||||
|
||||
if "challenge" in name or "divergence" in name:
|
||||
counts["challenge_count"] += 1
|
||||
elif status == "modified":
|
||||
counts["enrichment_count"] += 1
|
||||
else:
|
||||
counts["claim_count"] += 1
|
||||
|
||||
return counts
|
||||
|
||||
|
||||
def _classify_status(
|
||||
changed_files: int,
|
||||
reviews: list[dict],
|
||||
requested_reviewers: list[dict],
|
||||
) -> str:
|
||||
"""Classify PR status: broken, needs-review, approved-awaiting-merge, changes-requested."""
|
||||
if changed_files == 0:
|
||||
return "broken"
|
||||
|
||||
has_changes_requested = any(r["state"] == "REQUEST_CHANGES" for r in reviews)
|
||||
if has_changes_requested:
|
||||
# Check if there's a newer approval after the changes request
|
||||
last_change_req = max(
|
||||
(r["submitted_at"] for r in reviews if r["state"] == "REQUEST_CHANGES"),
|
||||
default="",
|
||||
)
|
||||
later_approvals = [
|
||||
r for r in reviews
|
||||
if r["state"] == "APPROVED" and r["submitted_at"] > last_change_req
|
||||
]
|
||||
if not later_approvals:
|
||||
return "changes-requested"
|
||||
|
||||
approvals = [r for r in reviews if r["state"] == "APPROVED"]
|
||||
if len(approvals) >= 2:
|
||||
return "approved-awaiting-merge"
|
||||
|
||||
return "needs-review"
|
||||
|
||||
|
||||
def _days_open(created_at: str) -> int:
|
||||
"""Calculate days since PR was opened."""
|
||||
created = datetime.fromisoformat(created_at.replace("Z", "+00:00"))
|
||||
now = datetime.now(timezone.utc)
|
||||
return (now - created).days
|
||||
|
||||
|
||||
_STATUS_PRIORITY = {
|
||||
"broken": 0,
|
||||
"needs-review": 1,
|
||||
"approved-awaiting-merge": 2,
|
||||
"changes-requested": 3,
|
||||
}
|
||||
|
||||
|
||||
async def fetch_review_queue(
|
||||
forgejo_token: str | None = None,
|
||||
timeout_s: int = 15,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Fetch open PRs from Forgejo and return enriched review queue.
|
||||
|
||||
Returns list sorted by display priority (broken first, then needs-review by age).
|
||||
"""
|
||||
headers = {"Accept": "application/json"}
|
||||
if forgejo_token:
|
||||
headers["Authorization"] = f"token {forgejo_token}"
|
||||
|
||||
connector = aiohttp.TCPConnector() # Default SSL verification — Forgejo token must not be exposed to MITM
|
||||
async with aiohttp.ClientSession(headers=headers, connector=connector) as session:
|
||||
# Fetch open PRs
|
||||
url = f"{FORGEJO_BASE}/repos/{REPO}/pulls?state=open&limit=50&sort=oldest"
|
||||
try:
|
||||
async with session.get(url, timeout=aiohttp.ClientTimeout(total=timeout_s)) as resp:
|
||||
if resp.status != 200:
|
||||
logger.error("Forgejo PR list returned %d", resp.status)
|
||||
return []
|
||||
prs = await resp.json()
|
||||
except Exception as e:
|
||||
logger.error("Failed to fetch PRs from Forgejo: %s", e)
|
||||
return []
|
||||
|
||||
# Fetch reviews and files for all PRs in parallel
|
||||
async def _fetch_json(session, url, label=""):
|
||||
try:
|
||||
async with session.get(url, timeout=aiohttp.ClientTimeout(total=timeout_s)) as resp:
|
||||
if resp.status == 200:
|
||||
return await resp.json()
|
||||
except Exception as e:
|
||||
logger.warning("Failed to fetch %s: %s", label, e)
|
||||
return []
|
||||
|
||||
sub_tasks = []
|
||||
for pr in prs:
|
||||
n = pr["number"]
|
||||
sub_tasks.append(_fetch_json(session, f"{FORGEJO_BASE}/repos/{REPO}/pulls/{n}/reviews", f"reviews PR#{n}"))
|
||||
sub_tasks.append(_fetch_json(session, f"{FORGEJO_BASE}/repos/{REPO}/pulls/{n}/files", f"files PR#{n}"))
|
||||
|
||||
sub_results = await asyncio.gather(*sub_tasks)
|
||||
|
||||
queue = []
|
||||
for i, pr in enumerate(prs):
|
||||
reviews = sub_results[i * 2]
|
||||
files = sub_results[i * 2 + 1]
|
||||
|
||||
# Build enriched PR record
|
||||
branch = pr.get("head", {}).get("ref", "") if pr.get("head") else ""
|
||||
title = pr.get("title", "")
|
||||
author = pr.get("user", {}).get("login", "unknown")
|
||||
created_at = pr.get("created_at", "")
|
||||
changed_files = pr.get("changed_files", len(files))
|
||||
requested_reviewers = pr.get("requested_reviewers", [])
|
||||
|
||||
domain = _detect_domain(branch, title, files)
|
||||
file_counts = _classify_files(files)
|
||||
status = _classify_status(changed_files, reviews, requested_reviewers)
|
||||
days = _days_open(created_at) if created_at else 0
|
||||
|
||||
review_list = [
|
||||
{
|
||||
"reviewer": r.get("user", {}).get("login", "unknown"),
|
||||
"outcome": r.get("state", "PENDING").lower(),
|
||||
"date": r.get("submitted_at", ""),
|
||||
"summary": r.get("body", "")[:200],
|
||||
}
|
||||
for r in reviews
|
||||
if r.get("state") and r["state"] != "PENDING"
|
||||
]
|
||||
|
||||
queue.append({
|
||||
"pr_number": pr["number"],
|
||||
"title": title,
|
||||
"author": author,
|
||||
"domain": domain,
|
||||
"branch": branch,
|
||||
"created_at": created_at,
|
||||
"days_open": days,
|
||||
"status": status,
|
||||
"changed_files": changed_files,
|
||||
**file_counts,
|
||||
"reviews": review_list,
|
||||
"url": pr.get("html_url", ""),
|
||||
})
|
||||
|
||||
# Sort: broken first, then needs-review by days_open desc, then rest
|
||||
queue.sort(key=lambda x: (_STATUS_PRIORITY.get(x["status"], 99), -x["days_open"]))
|
||||
|
||||
return queue
|
||||
64
diagnostics/review_queue_routes.py
Normal file
64
diagnostics/review_queue_routes.py
Normal file
|
|
@ -0,0 +1,64 @@
|
|||
"""Route handlers for /api/review-queue endpoint.
|
||||
|
||||
Import into app.py and register routes in create_app().
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
from aiohttp import web
|
||||
from review_queue import fetch_review_queue
|
||||
|
||||
logger = logging.getLogger("argus.review_queue")
|
||||
|
||||
|
||||
async def handle_review_queue(request):
|
||||
"""GET /api/review-queue — PR review pipeline view.
|
||||
|
||||
Query params:
|
||||
status: filter by status (broken, needs-review, approved-awaiting-merge, changes-requested)
|
||||
author: filter by agent/author name
|
||||
domain: filter by domain
|
||||
|
||||
Returns JSON with queue items sorted by display priority:
|
||||
broken (flagged) > needs-review (by age) > approved-awaiting-merge
|
||||
"""
|
||||
token = request.app.get("_forgejo_token")
|
||||
|
||||
try:
|
||||
queue = await fetch_review_queue(forgejo_token=token)
|
||||
except Exception as e:
|
||||
logger.error("Review queue fetch failed: %s", e)
|
||||
return web.json_response({"error": str(e)}, status=500)
|
||||
|
||||
# Apply filters
|
||||
status_filter = request.query.get("status")
|
||||
if status_filter:
|
||||
queue = [item for item in queue if item["status"] == status_filter]
|
||||
|
||||
author_filter = request.query.get("author")
|
||||
if author_filter:
|
||||
queue = [item for item in queue if item["author"] == author_filter]
|
||||
|
||||
domain_filter = request.query.get("domain")
|
||||
if domain_filter:
|
||||
queue = [item for item in queue if item["domain"] == domain_filter]
|
||||
|
||||
# Summary stats
|
||||
status_counts = {}
|
||||
for item in queue:
|
||||
status_counts[item["status"]] = status_counts.get(item["status"], 0) + 1
|
||||
|
||||
return web.json_response({
|
||||
"queue": queue,
|
||||
"total": len(queue),
|
||||
"status_counts": status_counts,
|
||||
})
|
||||
|
||||
|
||||
def register_review_queue_routes(app, forgejo_token=None):
|
||||
"""Register review queue routes on the app.
|
||||
|
||||
forgejo_token: optional Forgejo API token for authenticated requests
|
||||
"""
|
||||
app["_forgejo_token"] = forgejo_token
|
||||
app.router.add_get("/api/review-queue", handle_review_queue)
|
||||
150
diagnostics/shared_ui.py
Normal file
150
diagnostics/shared_ui.py
Normal file
|
|
@ -0,0 +1,150 @@
|
|||
"""Shared UI components for the 4-page Argus dashboard.
|
||||
|
||||
Provides: nav bar, CSS, page skeleton, Chart.js imports, shared JS helpers.
|
||||
All pages import render_page() and pass their body HTML + page-specific scripts.
|
||||
"""
|
||||
|
||||
# Page definitions — used by nav bar
|
||||
PAGES = [
|
||||
{"path": "/prs", "label": "PRs", "icon": "✎"},
|
||||
{"path": "/ops", "label": "Operations", "icon": "⚙"},
|
||||
{"path": "/health", "label": "Knowledge Health", "icon": "♥"},
|
||||
{"path": "/agents", "label": "Agents", "icon": "★"},
|
||||
{"path": "/epistemic", "label": "Epistemic", "icon": "⚖"},
|
||||
{"path": "/portfolio", "label": "Portfolio", "icon": "★"},
|
||||
]
|
||||
|
||||
|
||||
def _nav_html(active_path: str) -> str:
|
||||
"""Render the shared navigation bar."""
|
||||
links = []
|
||||
for p in PAGES:
|
||||
cls = "nav-active" if p["path"] == active_path else ""
|
||||
links.append(
|
||||
f'<a href="{p["path"]}" class="nav-link {cls}">'
|
||||
f'{p["icon"]} {p["label"]}</a>'
|
||||
)
|
||||
return f"""<nav class="top-nav">
|
||||
<div class="nav-brand">Argus</div>
|
||||
<div class="nav-links">{"".join(links)}</div>
|
||||
<div class="nav-aux">
|
||||
<a href="/audit" class="nav-link">Audit</a>
|
||||
<a href="/api/metrics" class="nav-link">API</a>
|
||||
</div>
|
||||
</nav>"""
|
||||
|
||||
|
||||
SHARED_CSS = """
|
||||
* { box-sizing: border-box; margin: 0; padding: 0; }
|
||||
body { font-family: -apple-system, system-ui, 'Segoe UI', sans-serif; background: #0d1117; color: #c9d1d9; }
|
||||
.top-nav { display: flex; align-items: center; gap: 16px; padding: 12px 24px;
|
||||
background: #161b22; border-bottom: 1px solid #30363d; position: sticky; top: 0; z-index: 100; }
|
||||
.nav-brand { color: #58a6ff; font-weight: 700; font-size: 18px; }
|
||||
.nav-links { display: flex; gap: 4px; flex: 1; }
|
||||
.nav-aux { display: flex; gap: 4px; }
|
||||
.nav-link { color: #8b949e; text-decoration: none; padding: 6px 12px; border-radius: 6px;
|
||||
font-size: 13px; transition: all 0.15s; white-space: nowrap; }
|
||||
.nav-link:hover { color: #c9d1d9; background: #21262d; }
|
||||
.nav-active { color: #58a6ff !important; background: #0d1117; font-weight: 600; }
|
||||
.page-content { padding: 24px; max-width: 1400px; margin: 0 auto; }
|
||||
.page-header { margin-bottom: 20px; }
|
||||
.page-header h1 { color: #58a6ff; font-size: 22px; }
|
||||
.page-header .subtitle { color: #8b949e; font-size: 13px; margin-top: 4px; }
|
||||
.grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(160px, 1fr)); gap: 12px; margin: 16px 0; }
|
||||
.card { background: #161b22; border: 1px solid #30363d; border-radius: 8px; padding: 16px; }
|
||||
.card .label { color: #8b949e; font-size: 11px; text-transform: uppercase; letter-spacing: 0.5px; }
|
||||
.card .value { font-size: 28px; font-weight: 700; margin-top: 2px; }
|
||||
.card .detail { color: #8b949e; font-size: 11px; margin-top: 2px; }
|
||||
.green { color: #3fb950; }
|
||||
.yellow { color: #d29922; }
|
||||
.red { color: #f85149; }
|
||||
.blue { color: #58a6ff; }
|
||||
.purple { color: #bc8cff; }
|
||||
.chart-container { background: #161b22; border: 1px solid #30363d; border-radius: 8px; padding: 16px; margin: 16px 0; }
|
||||
.chart-container h2 { color: #c9d1d9; font-size: 14px; margin-bottom: 12px; }
|
||||
canvas { max-height: 260px; }
|
||||
.row { display: grid; grid-template-columns: 1fr 1fr; gap: 16px; }
|
||||
@media (max-width: 800px) { .row { grid-template-columns: 1fr; } }
|
||||
table { width: 100%; border-collapse: collapse; font-size: 13px; }
|
||||
th { color: #8b949e; font-size: 11px; text-transform: uppercase; text-align: left; padding: 6px 10px; border-bottom: 1px solid #30363d; }
|
||||
td { padding: 6px 10px; border-bottom: 1px solid #21262d; }
|
||||
code { background: #21262d; padding: 2px 6px; border-radius: 3px; font-size: 12px; }
|
||||
.section { margin-top: 28px; }
|
||||
.section-title { color: #58a6ff; font-size: 15px; font-weight: 600; margin-bottom: 12px; padding-bottom: 6px; border-bottom: 1px solid #21262d; }
|
||||
.funnel { display: flex; align-items: center; gap: 8px; flex-wrap: wrap; }
|
||||
.funnel-step { text-align: center; flex: 1; min-width: 100px; }
|
||||
.funnel-step .num { font-size: 24px; font-weight: 700; }
|
||||
.funnel-step .lbl { font-size: 11px; color: #8b949e; text-transform: uppercase; }
|
||||
.funnel-arrow { color: #30363d; font-size: 20px; }
|
||||
.footer { margin-top: 40px; padding: 16px 24px; border-top: 1px solid #21262d; color: #484f58; font-size: 11px; text-align: center; }
|
||||
.footer a { color: #484f58; text-decoration: none; }
|
||||
.footer a:hover { color: #8b949e; }
|
||||
.alert-banner { padding: 8px 16px; font-size: 12px; border-radius: 6px; margin-bottom: 12px; }
|
||||
.alert-critical { background: #f8514922; border: 1px solid #f85149; color: #f85149; }
|
||||
.alert-warning { background: #d2992222; border: 1px solid #d29922; color: #d29922; }
|
||||
.alert-info { background: #58a6ff22; border: 1px solid #58a6ff; color: #58a6ff; }
|
||||
.badge { display: inline-block; padding: 2px 8px; border-radius: 4px; font-size: 11px; font-weight: 600; }
|
||||
.badge-green { background: #23863633; color: #3fb950; }
|
||||
.badge-yellow { background: #d2992233; color: #d29922; }
|
||||
.badge-red { background: #f8514933; color: #f85149; }
|
||||
.badge-blue { background: #1f6feb33; color: #58a6ff; }
|
||||
"""
|
||||
|
||||
|
||||
CHART_JS_IMPORTS = """<script src="https://cdn.jsdelivr.net/npm/chart.js@4.4.6"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/chartjs-adapter-date-fns@3.0.0"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/chartjs-plugin-annotation@3.1.0"></script>"""
|
||||
|
||||
|
||||
SHARED_JS = """
|
||||
const AGENT_COLORS = {
|
||||
'rio': '#58a6ff', 'clay': '#3fb950', 'astra': '#bc8cff',
|
||||
'leo': '#d29922', 'vida': '#f0883e', 'theseus': '#f85149',
|
||||
'epimetheus': '#79c0ff', 'ganymede': '#8b949e', 'oberon': '#ec4899',
|
||||
};
|
||||
function agentColor(name) {
|
||||
return AGENT_COLORS[name?.toLowerCase()] ||
|
||||
'#' + ((name||'').split('').reduce((a,c) => (a*31+c.charCodeAt(0))&0xFFFFFF, 0x556677)).toString(16).padStart(6,'0');
|
||||
}
|
||||
Chart.defaults.color = '#8b949e';
|
||||
Chart.defaults.borderColor = '#21262d';
|
||||
Chart.defaults.font.family = '-apple-system, system-ui, sans-serif';
|
||||
Chart.defaults.font.size = 11;
|
||||
|
||||
function esc(s) { const d = document.createElement('div'); d.textContent = s; return d.innerHTML; }
|
||||
function fmtPct(v) { return v != null ? (v * 100).toFixed(1) + '%' : '--'; }
|
||||
function fmtNum(v) { return v != null ? v.toLocaleString() : '--'; }
|
||||
function fmtDollars(v) { return v != null ? '$' + v.toFixed(2) : '--'; }
|
||||
"""
|
||||
|
||||
|
||||
def render_page(title: str, subtitle: str, active_path: str, body_html: str,
|
||||
scripts: str = "", extra_css: str = "", timestamp: str = "") -> str:
|
||||
"""Render a complete page with nav, content, and footer."""
|
||||
ts_display = f" · {timestamp}" if timestamp else ""
|
||||
return f"""<!DOCTYPE html>
|
||||
<html lang="en"><head>
|
||||
<meta charset="utf-8">
|
||||
<title>Argus - {title}</title>
|
||||
<meta http-equiv="refresh" content="60">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
{CHART_JS_IMPORTS}
|
||||
<style>{SHARED_CSS}{extra_css}</style>
|
||||
</head><body>
|
||||
{_nav_html(active_path)}
|
||||
<div class="page-content">
|
||||
<div class="page-header">
|
||||
<h1>{title}</h1>
|
||||
<div class="subtitle">{subtitle}{ts_display} · auto-refresh 60s</div>
|
||||
</div>
|
||||
{body_html}
|
||||
</div>
|
||||
<div class="footer">
|
||||
Argus · Teleo Pipeline Diagnostics ·
|
||||
<a href="/api/metrics">Metrics API</a> ·
|
||||
<a href="/api/vital-signs">Vital Signs API</a> ·
|
||||
<a href="/api/contributors">Contributors API</a>
|
||||
</div>
|
||||
<script>{SHARED_JS}</script>
|
||||
{scripts}
|
||||
</body></html>"""
|
||||
476
diagnostics/tier1_metrics.py
Normal file
476
diagnostics/tier1_metrics.py
Normal file
|
|
@ -0,0 +1,476 @@
|
|||
"""Tier 1 Metrics — The three numbers that matter most for knowledge production.
|
||||
|
||||
1. Extraction yield: claims merged / claims evaluated, per agent, per week
|
||||
2. Cost per merged claim: total spend / merged claims, per week
|
||||
3. Fix success rate by rejection tag: which rejection reasons are fixable vs terminal
|
||||
|
||||
These queries run against pipeline.db (read-only) and power the /api/yield,
|
||||
/api/cost-per-claim, and /api/fix-rates endpoints.
|
||||
|
||||
Owner: Argus <69AF7290-758F-464B-B472-04AFCA4AB340>
|
||||
"""
|
||||
|
||||
import sqlite3
|
||||
|
||||
|
||||
def extraction_yield(conn: sqlite3.Connection, days: int = 30) -> dict:
|
||||
"""Extraction yield = merged / evaluated, trended per agent per week.
|
||||
|
||||
Returns:
|
||||
{
|
||||
"daily": [{"day": "2026-W13", "agent": "rio", "evaluated": 20, "merged": 8, "yield": 0.4}, ...],
|
||||
"totals": [{"agent": "rio", "evaluated": 100, "merged": 40, "yield": 0.4}, ...],
|
||||
"system": {"evaluated": 500, "merged": 200, "yield": 0.4}
|
||||
}
|
||||
"""
|
||||
# Weekly yield per agent
|
||||
# Uses strftime('%Y-W%W') for ISO week grouping
|
||||
# evaluated = approved + rejected (all terminal eval events)
|
||||
# merged = approved events only
|
||||
weekly = conn.execute(
|
||||
"""
|
||||
SELECT date(timestamp) as day,
|
||||
json_extract(detail, '$.agent') as agent,
|
||||
COUNT(*) as evaluated,
|
||||
SUM(CASE WHEN event = 'approved' THEN 1 ELSE 0 END) as merged
|
||||
FROM audit_log
|
||||
WHERE stage = 'evaluate'
|
||||
AND event IN ('approved', 'changes_requested', 'domain_rejected', 'tier05_rejected')
|
||||
AND timestamp > datetime('now', ? || ' days')
|
||||
GROUP BY day, agent
|
||||
ORDER BY day DESC, agent
|
||||
""",
|
||||
(f"-{days}",),
|
||||
).fetchall()
|
||||
|
||||
daily_data = []
|
||||
for r in weekly:
|
||||
ev = r["evaluated"] or 0
|
||||
mg = r["merged"] or 0
|
||||
daily_data.append({
|
||||
"day": r["day"],
|
||||
"agent": r["agent"] or "unknown",
|
||||
"evaluated": ev,
|
||||
"merged": mg,
|
||||
"yield": round(mg / ev, 3) if ev else 0,
|
||||
})
|
||||
|
||||
# Per-agent totals (same window)
|
||||
totals = conn.execute(
|
||||
"""
|
||||
SELECT json_extract(detail, '$.agent') as agent,
|
||||
COUNT(*) as evaluated,
|
||||
SUM(CASE WHEN event = 'approved' THEN 1 ELSE 0 END) as merged
|
||||
FROM audit_log
|
||||
WHERE stage = 'evaluate'
|
||||
AND event IN ('approved', 'changes_requested', 'domain_rejected', 'tier05_rejected')
|
||||
AND timestamp > datetime('now', ? || ' days')
|
||||
GROUP BY agent
|
||||
ORDER BY merged DESC
|
||||
""",
|
||||
(f"-{days}",),
|
||||
).fetchall()
|
||||
|
||||
totals_data = []
|
||||
for r in totals:
|
||||
ev = r["evaluated"] or 0
|
||||
mg = r["merged"] or 0
|
||||
totals_data.append({
|
||||
"agent": r["agent"] or "unknown",
|
||||
"evaluated": ev,
|
||||
"merged": mg,
|
||||
"yield": round(mg / ev, 3) if ev else 0,
|
||||
})
|
||||
|
||||
# System-wide total
|
||||
sys_row = conn.execute(
|
||||
"""
|
||||
SELECT COUNT(*) as evaluated,
|
||||
SUM(CASE WHEN event = 'approved' THEN 1 ELSE 0 END) as merged
|
||||
FROM audit_log
|
||||
WHERE stage = 'evaluate'
|
||||
AND event IN ('approved', 'changes_requested', 'domain_rejected', 'tier05_rejected')
|
||||
AND timestamp > datetime('now', ? || ' days')
|
||||
""",
|
||||
(f"-{days}",),
|
||||
).fetchone()
|
||||
|
||||
sys_ev = sys_row["evaluated"] or 0
|
||||
sys_mg = sys_row["merged"] or 0
|
||||
|
||||
return {
|
||||
"days": days,
|
||||
"daily": daily_data,
|
||||
"totals": totals_data,
|
||||
"system": {
|
||||
"evaluated": sys_ev,
|
||||
"merged": sys_mg,
|
||||
"yield": round(sys_mg / sys_ev, 3) if sys_ev else 0,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def cost_per_merged_claim(conn: sqlite3.Connection, days: int = 30) -> dict:
|
||||
"""Cost and compute per merged claim, trended per week.
|
||||
|
||||
Uses costs table for spend + tokens and prs table for merge counts.
|
||||
Breaks down by stage. Separates API spend (dollars) from subscription
|
||||
compute (tokens only — Claude Max is flat-rate, so dollars are meaningless).
|
||||
|
||||
Returns:
|
||||
{
|
||||
"daily": [{"day": "2026-W13", "api_cost": 1.50, "merged": 8,
|
||||
"cost_per_claim": 0.19, "input_tokens": 50000,
|
||||
"output_tokens": 5000, "total_tokens": 55000,
|
||||
"tokens_per_claim": 6875}, ...],
|
||||
"by_stage": [{"stage": "eval_leo:openrouter", "api_cost": 1.50,
|
||||
"input_tokens": 300000, "output_tokens": 50000,
|
||||
"calls": 100, "billing": "api"}, ...],
|
||||
"system": {"api_cost": 2.36, "merged": 80, "cost_per_claim": 0.03,
|
||||
"total_tokens": 1200000, "tokens_per_claim": 15000,
|
||||
"subscription_tokens": 0, "api_tokens": 1200000}
|
||||
}
|
||||
"""
|
||||
# Weekly: cost + tokens from costs table, merged count from prs table
|
||||
daily_cost = conn.execute(
|
||||
"""
|
||||
SELECT date as day,
|
||||
SUM(cost_usd) as api_cost,
|
||||
SUM(cost_estimate_usd) as estimated_cost,
|
||||
SUM(input_tokens) as input_tokens,
|
||||
SUM(output_tokens) as output_tokens
|
||||
FROM costs
|
||||
WHERE date > date('now', ? || ' days')
|
||||
GROUP BY day
|
||||
ORDER BY day DESC
|
||||
""",
|
||||
(f"-{days}",),
|
||||
).fetchall()
|
||||
|
||||
daily_merges = conn.execute(
|
||||
"""
|
||||
SELECT date(merged_at) as day,
|
||||
COUNT(*) as merged
|
||||
FROM prs
|
||||
WHERE status = 'merged'
|
||||
AND merged_at > datetime('now', ? || ' days')
|
||||
GROUP BY day
|
||||
ORDER BY day DESC
|
||||
""",
|
||||
(f"-{days}",),
|
||||
).fetchall()
|
||||
|
||||
# Merge into combined weekly view
|
||||
merge_map = {r["day"]: r["merged"] for r in daily_merges}
|
||||
cost_map = {}
|
||||
for r in daily_cost:
|
||||
cost_map[r["day"]] = {
|
||||
"api_cost": r["api_cost"] or 0,
|
||||
"estimated_cost": r["estimated_cost"] or 0,
|
||||
"input_tokens": r["input_tokens"] or 0,
|
||||
"output_tokens": r["output_tokens"] or 0,
|
||||
}
|
||||
|
||||
all_days = sorted(set(list(merge_map.keys()) + list(cost_map.keys())), reverse=True)
|
||||
daily_data = []
|
||||
for w in all_days:
|
||||
c = cost_map.get(w, {"api_cost": 0, "estimated_cost": 0, "input_tokens": 0, "output_tokens": 0})
|
||||
merged = merge_map.get(w, 0) or 0
|
||||
total_tokens = c["input_tokens"] + c["output_tokens"]
|
||||
daily_data.append({
|
||||
"day": w,
|
||||
"actual_spend": round(c["api_cost"], 4),
|
||||
"estimated_cost": round(c["estimated_cost"], 4),
|
||||
"merged": merged,
|
||||
"cost_per_claim": round(c["estimated_cost"] / merged, 4) if merged else None,
|
||||
"input_tokens": c["input_tokens"],
|
||||
"output_tokens": c["output_tokens"],
|
||||
"total_tokens": total_tokens,
|
||||
"tokens_per_claim": round(total_tokens / merged) if merged else None,
|
||||
})
|
||||
|
||||
# By stage with billing type (full window)
|
||||
by_stage = conn.execute(
|
||||
"""
|
||||
SELECT stage,
|
||||
SUM(cost_usd) as api_cost,
|
||||
SUM(cost_estimate_usd) as estimated_cost,
|
||||
SUM(input_tokens) as input_tokens,
|
||||
SUM(output_tokens) as output_tokens,
|
||||
SUM(calls) as calls
|
||||
FROM costs
|
||||
WHERE date > date('now', ? || ' days')
|
||||
GROUP BY stage
|
||||
ORDER BY SUM(input_tokens + output_tokens) DESC
|
||||
""",
|
||||
(f"-{days}",),
|
||||
).fetchall()
|
||||
|
||||
stage_data = []
|
||||
total_api_cost = 0
|
||||
total_estimated_cost = 0
|
||||
total_input = 0
|
||||
total_output = 0
|
||||
subscription_tokens = 0
|
||||
api_tokens = 0
|
||||
for r in by_stage:
|
||||
cost = r["api_cost"] or 0
|
||||
est = r["estimated_cost"] or 0
|
||||
inp = r["input_tokens"] or 0
|
||||
out = r["output_tokens"] or 0
|
||||
calls = r["calls"] or 0
|
||||
stage_name = r["stage"]
|
||||
# :max suffix = subscription, :openrouter suffix = API
|
||||
billing = "subscription" if ":max" in stage_name else "api"
|
||||
total_api_cost += cost
|
||||
total_estimated_cost += est
|
||||
total_input += inp
|
||||
total_output += out
|
||||
if billing == "subscription":
|
||||
subscription_tokens += inp + out
|
||||
else:
|
||||
api_tokens += inp + out
|
||||
stage_data.append({
|
||||
"stage": stage_name,
|
||||
"api_cost": round(cost, 4),
|
||||
"estimated_cost": round(est, 4),
|
||||
"input_tokens": inp,
|
||||
"output_tokens": out,
|
||||
"calls": calls,
|
||||
"billing": billing,
|
||||
})
|
||||
|
||||
# System totals
|
||||
sys_merged = conn.execute(
|
||||
"SELECT COUNT(*) as n FROM prs WHERE status='merged' AND merged_at > datetime('now', ? || ' days')",
|
||||
(f"-{days}",),
|
||||
).fetchone()["n"] or 0
|
||||
|
||||
total_tokens = total_input + total_output
|
||||
|
||||
return {
|
||||
"days": days,
|
||||
"daily": daily_data,
|
||||
"by_stage": stage_data,
|
||||
"system": {
|
||||
"actual_spend": round(total_api_cost, 4),
|
||||
"estimated_cost": round(total_estimated_cost, 4),
|
||||
"merged": sys_merged,
|
||||
"cost_per_claim": round(total_estimated_cost / sys_merged, 4) if sys_merged else None,
|
||||
"total_tokens": total_tokens,
|
||||
"tokens_per_claim": round(total_tokens / sys_merged) if sys_merged else None,
|
||||
"subscription_tokens": subscription_tokens,
|
||||
"api_tokens": api_tokens,
|
||||
"note": "estimated_cost = API-rate equivalent for all calls (unified metric). actual_spend = real dollars charged to OpenRouter.",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def fix_success_by_tag(conn: sqlite3.Connection, days: int = 30) -> dict:
|
||||
"""Fix success rate broken down by rejection reason.
|
||||
|
||||
For each rejection tag: how many PRs got that rejection, how many eventually
|
||||
merged (successful fix), how many are still open (in progress), how many
|
||||
were abandoned (closed/zombie without merge).
|
||||
|
||||
Returns:
|
||||
{
|
||||
"tags": [
|
||||
{
|
||||
"tag": "insufficient_evidence",
|
||||
"total": 50,
|
||||
"fixed": 10,
|
||||
"in_progress": 5,
|
||||
"terminal": 35,
|
||||
"fix_rate": 0.2,
|
||||
"terminal_rate": 0.7
|
||||
}, ...
|
||||
]
|
||||
}
|
||||
"""
|
||||
# Get all rejection events with their tags and PR numbers
|
||||
# Then join with prs table to see final outcome
|
||||
rows = conn.execute(
|
||||
"""
|
||||
SELECT value as tag,
|
||||
json_extract(al.detail, '$.pr') as pr_number
|
||||
FROM audit_log al, json_each(json_extract(al.detail, '$.issues'))
|
||||
WHERE al.stage = 'evaluate'
|
||||
AND al.event IN ('changes_requested', 'domain_rejected', 'tier05_rejected')
|
||||
AND al.timestamp > datetime('now', ? || ' days')
|
||||
""",
|
||||
(f"-{days}",),
|
||||
).fetchall()
|
||||
|
||||
# Collect unique PRs per tag
|
||||
tag_prs: dict[str, set] = {}
|
||||
for r in rows:
|
||||
tag = r["tag"]
|
||||
pr = r["pr_number"]
|
||||
if tag not in tag_prs:
|
||||
tag_prs[tag] = set()
|
||||
if pr is not None:
|
||||
tag_prs[tag].add(pr)
|
||||
|
||||
if not tag_prs:
|
||||
return {"days": days, "tags": []}
|
||||
|
||||
# Get status for all referenced PRs in one query
|
||||
all_prs = set()
|
||||
for prs in tag_prs.values():
|
||||
all_prs.update(prs)
|
||||
|
||||
if not all_prs:
|
||||
return {"days": days, "tags": []}
|
||||
|
||||
placeholders = ",".join("?" for _ in all_prs)
|
||||
pr_statuses = conn.execute(
|
||||
f"SELECT number, status FROM prs WHERE number IN ({placeholders})",
|
||||
list(all_prs),
|
||||
).fetchall()
|
||||
status_map = {r["number"]: r["status"] for r in pr_statuses}
|
||||
|
||||
# Compute per-tag outcomes
|
||||
tag_data = []
|
||||
for tag, prs in sorted(tag_prs.items(), key=lambda x: -len(x[1])):
|
||||
fixed = 0
|
||||
in_progress = 0
|
||||
terminal = 0
|
||||
for pr in prs:
|
||||
st = status_map.get(pr, "unknown")
|
||||
if st == "merged":
|
||||
fixed += 1
|
||||
elif st in ("open", "validating", "reviewing", "merging"):
|
||||
in_progress += 1
|
||||
else:
|
||||
# closed, zombie, conflict, unknown
|
||||
terminal += 1
|
||||
|
||||
total = len(prs)
|
||||
# Fix rate excludes in-progress (only counts resolved PRs)
|
||||
resolved = fixed + terminal
|
||||
tag_data.append({
|
||||
"tag": tag,
|
||||
"total": total,
|
||||
"fixed": fixed,
|
||||
"in_progress": in_progress,
|
||||
"terminal": terminal,
|
||||
"fix_rate": round(fixed / resolved, 3) if resolved else None,
|
||||
"terminal_rate": round(terminal / resolved, 3) if resolved else None,
|
||||
})
|
||||
|
||||
return {"days": days, "tags": tag_data}
|
||||
|
||||
|
||||
def compute_profile(conn: "sqlite3.Connection", days: int = 30) -> dict:
|
||||
"""Compute profile — Max subscription telemetry alongside API usage.
|
||||
|
||||
Surfaces: cache hit rates, latency, cost estimates (API-equivalent),
|
||||
token breakdown by billing type.
|
||||
"""
|
||||
rows = conn.execute(
|
||||
"""
|
||||
SELECT stage, model,
|
||||
SUM(calls) as calls,
|
||||
SUM(input_tokens) as input_tokens,
|
||||
SUM(output_tokens) as output_tokens,
|
||||
SUM(cost_usd) as api_cost,
|
||||
SUM(duration_ms) as duration_ms,
|
||||
SUM(cache_read_tokens) as cache_read_tokens,
|
||||
SUM(cache_write_tokens) as cache_write_tokens,
|
||||
SUM(cost_estimate_usd) as cost_estimate_usd
|
||||
FROM costs
|
||||
WHERE date > date('now', ? || ' days')
|
||||
GROUP BY stage, model
|
||||
ORDER BY SUM(input_tokens + output_tokens) DESC
|
||||
""",
|
||||
(f"-{days}",),
|
||||
).fetchall()
|
||||
|
||||
stage_data = []
|
||||
total_calls = 0
|
||||
total_tokens = 0
|
||||
total_duration = 0
|
||||
total_cache_read = 0
|
||||
total_cache_write = 0
|
||||
api_calls = 0
|
||||
sub_calls = 0
|
||||
api_spend = 0.0
|
||||
sub_estimate = 0.0
|
||||
sub_input_tokens = 0
|
||||
|
||||
for r in rows:
|
||||
calls = r["calls"] or 0
|
||||
inp = r["input_tokens"] or 0
|
||||
out = r["output_tokens"] or 0
|
||||
dur = r["duration_ms"] or 0
|
||||
cr = r["cache_read_tokens"] or 0
|
||||
cw = r["cache_write_tokens"] or 0
|
||||
cost = r["api_cost"] or 0
|
||||
est = r["cost_estimate_usd"] or 0
|
||||
stage_name = r["stage"]
|
||||
billing = "subscription" if ":max" in stage_name else "api"
|
||||
|
||||
total_calls += calls
|
||||
total_tokens += inp + out
|
||||
total_duration += dur
|
||||
total_cache_read += cr
|
||||
total_cache_write += cw
|
||||
|
||||
if billing == "subscription":
|
||||
sub_calls += calls
|
||||
sub_estimate += est
|
||||
sub_input_tokens += inp
|
||||
else:
|
||||
api_calls += calls
|
||||
api_spend += cost
|
||||
|
||||
stage_data.append({
|
||||
"stage": stage_name,
|
||||
"model": r["model"],
|
||||
"calls": calls,
|
||||
"input_tokens": inp,
|
||||
"output_tokens": out,
|
||||
"total_tokens": inp + out,
|
||||
"duration_ms": dur,
|
||||
"avg_latency_ms": round(dur / calls) if calls else 0,
|
||||
"cache_read_tokens": cr,
|
||||
"cache_write_tokens": cw,
|
||||
"cache_hit_rate": round(cr / (cr + inp), 3) if (cr + inp) else 0,
|
||||
"api_cost": round(cost, 4),
|
||||
"cost_estimate_usd": round(est, 4),
|
||||
"billing": billing,
|
||||
})
|
||||
|
||||
# Cache summary (only meaningful for subscription/Max calls)
|
||||
total_cacheable = total_cache_read + total_cache_write + sub_input_tokens
|
||||
cache_hit_rate = round(total_cache_read / total_cacheable, 3) if total_cacheable else 0
|
||||
|
||||
return {
|
||||
"days": days,
|
||||
"by_stage": stage_data,
|
||||
"cache": {
|
||||
"read_tokens": total_cache_read,
|
||||
"write_tokens": total_cache_write,
|
||||
"hit_rate": cache_hit_rate,
|
||||
"note": "Cache hits are prompt tokens served from cache (cheaper/faster)",
|
||||
},
|
||||
"latency": {
|
||||
"total_ms": total_duration,
|
||||
"avg_ms_per_call": round(total_duration / total_calls) if total_calls else 0,
|
||||
"note": "Wall-clock time including network. Only populated for Claude Max calls.",
|
||||
},
|
||||
"subscription_estimate": {
|
||||
"total_cost_usd": round(sub_estimate, 4),
|
||||
"note": "What subscription calls would cost at API rates. Actual cost: $0 (flat-rate Max plan).",
|
||||
},
|
||||
"system": {
|
||||
"total_calls": total_calls,
|
||||
"total_tokens": total_tokens,
|
||||
"api_calls": api_calls,
|
||||
"subscription_calls": sub_calls,
|
||||
"api_spend": round(api_spend, 4),
|
||||
"subscription_estimate": round(sub_estimate, 4),
|
||||
"cache_hit_rate": cache_hit_rate,
|
||||
},
|
||||
}
|
||||
57
diagnostics/tier1_routes.py
Normal file
57
diagnostics/tier1_routes.py
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
"""Tier 1 Metrics — API routes for Argus dashboard.
|
||||
|
||||
Four endpoints:
|
||||
GET /api/yield — extraction yield per agent per day
|
||||
GET /api/cost-per-claim — cost per merged claim per day + stage breakdown
|
||||
GET /api/fix-rates — fix success rate by rejection tag
|
||||
GET /api/compute-profile — full compute telemetry (cache, latency, cost estimates)
|
||||
|
||||
All accept ?days=N (default 30) to control lookback window.
|
||||
|
||||
Owner: Argus <69AF7290-758F-464B-B472-04AFCA4AB340>
|
||||
"""
|
||||
|
||||
from aiohttp import web
|
||||
|
||||
from tier1_metrics import cost_per_merged_claim, compute_profile, extraction_yield, fix_success_by_tag
|
||||
|
||||
|
||||
def _parse_days(request, default=30):
|
||||
"""Parse and clamp ?days= parameter. Returns 1..365."""
|
||||
try:
|
||||
days = int(request.query.get("days", str(default)))
|
||||
except (ValueError, TypeError):
|
||||
days = default
|
||||
return max(1, min(days, 365))
|
||||
|
||||
|
||||
async def handle_yield(request):
|
||||
conn = request.app["_get_conn"]()
|
||||
days = _parse_days(request)
|
||||
return web.json_response(extraction_yield(conn, days))
|
||||
|
||||
|
||||
async def handle_cost_per_claim(request):
|
||||
conn = request.app["_get_conn"]()
|
||||
days = _parse_days(request)
|
||||
return web.json_response(cost_per_merged_claim(conn, days))
|
||||
|
||||
|
||||
async def handle_fix_rates(request):
|
||||
conn = request.app["_get_conn"]()
|
||||
days = _parse_days(request)
|
||||
return web.json_response(fix_success_by_tag(conn, days))
|
||||
|
||||
|
||||
async def handle_compute_profile(request):
|
||||
conn = request.app["_get_conn"]()
|
||||
days = _parse_days(request)
|
||||
return web.json_response(compute_profile(conn, days))
|
||||
|
||||
|
||||
def register_tier1_routes(app: web.Application, get_conn):
|
||||
app["_get_conn"] = get_conn
|
||||
app.router.add_get("/api/yield", handle_yield)
|
||||
app.router.add_get("/api/cost-per-claim", handle_cost_per_claim)
|
||||
app.router.add_get("/api/fix-rates", handle_fix_rates)
|
||||
app.router.add_get("/api/compute-profile", handle_compute_profile)
|
||||
629
diagnostics/vitality.py
Normal file
629
diagnostics/vitality.py
Normal file
|
|
@ -0,0 +1,629 @@
|
|||
"""Agent Vitality Diagnostics — data collection and schema.
|
||||
|
||||
Records daily vitality snapshots per agent across 10 dimensions.
|
||||
Designed as the objective function for agent "aliveness" ranking.
|
||||
|
||||
Owner: Ship (data collection) + Argus (storage, API, dashboard)
|
||||
Data sources: pipeline.db (read-only), claim-index API, agent-state filesystem, review_records
|
||||
|
||||
Dimension keys (agreed with Leo 2026-04-08):
|
||||
knowledge_output, knowledge_quality, contributor_engagement,
|
||||
review_performance, spend_efficiency, autonomy,
|
||||
infrastructure_health, social_reach, capital, external_impact
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import sqlite3
|
||||
import urllib.request
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
logger = logging.getLogger("vitality")
|
||||
|
||||
# Known domain agents and their primary domains
|
||||
AGENT_DOMAINS = {
|
||||
"rio": ["internet-finance"],
|
||||
"theseus": ["collective-intelligence", "living-agents"],
|
||||
"astra": ["space-development", "energy", "manufacturing", "robotics"],
|
||||
"vida": ["health"],
|
||||
"clay": ["entertainment", "cultural-dynamics"],
|
||||
"leo": ["grand-strategy", "teleohumanity"],
|
||||
"hermes": [], # communications, no domain
|
||||
"rhea": [], # infrastructure ops, no domain
|
||||
"ganymede": [], # code review, no domain
|
||||
"epimetheus": [], # pipeline, no domain
|
||||
"oberon": [], # dashboard, no domain
|
||||
"argus": [], # diagnostics, no domain
|
||||
"ship": [], # engineering, no domain
|
||||
}
|
||||
|
||||
# Agent file path prefixes — for matching claims by location, not just domain field.
|
||||
# Handles claims in core/ and foundations/ that may not have a standard domain field
|
||||
# in the claim-index (domain derived from directory path).
|
||||
AGENT_PATHS = {
|
||||
"rio": ["domains/internet-finance/"],
|
||||
"theseus": ["domains/ai-alignment/", "core/living-agents/", "core/collective-intelligence/",
|
||||
"foundations/collective-intelligence/"],
|
||||
"astra": ["domains/space-development/", "domains/energy/",
|
||||
"domains/manufacturing/", "domains/robotics/"],
|
||||
"vida": ["domains/health/"],
|
||||
"clay": ["domains/entertainment/", "foundations/cultural-dynamics/"],
|
||||
"leo": ["core/grand-strategy/", "core/teleohumanity/", "core/mechanisms/",
|
||||
"core/living-capital/", "foundations/teleological-economics/",
|
||||
"foundations/critical-systems/"],
|
||||
}
|
||||
|
||||
ALL_AGENTS = list(AGENT_DOMAINS.keys())
|
||||
|
||||
# Agent-state directory (VPS filesystem)
|
||||
AGENT_STATE_DIR = Path(os.environ.get(
|
||||
"AGENT_STATE_DIR", "/opt/teleo-eval/agent-state"
|
||||
))
|
||||
|
||||
MIGRATION_SQL = """
|
||||
CREATE TABLE IF NOT EXISTS vitality_snapshots (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
agent_name TEXT NOT NULL,
|
||||
dimension TEXT NOT NULL,
|
||||
metric TEXT NOT NULL,
|
||||
value REAL NOT NULL DEFAULT 0,
|
||||
unit TEXT NOT NULL DEFAULT '',
|
||||
source TEXT,
|
||||
recorded_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||
UNIQUE(agent_name, dimension, metric, recorded_at)
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_vitality_agent_time
|
||||
ON vitality_snapshots(agent_name, recorded_at);
|
||||
CREATE INDEX IF NOT EXISTS idx_vitality_dimension
|
||||
ON vitality_snapshots(dimension, recorded_at);
|
||||
"""
|
||||
|
||||
# Add source column if missing (idempotent upgrade from v1 schema)
|
||||
UPGRADE_SQL = """
|
||||
ALTER TABLE vitality_snapshots ADD COLUMN source TEXT;
|
||||
"""
|
||||
|
||||
|
||||
def ensure_schema(db_path: str):
|
||||
"""Create vitality_snapshots table if it doesn't exist."""
|
||||
conn = sqlite3.connect(db_path, timeout=30)
|
||||
try:
|
||||
conn.executescript(MIGRATION_SQL)
|
||||
try:
|
||||
conn.execute(UPGRADE_SQL)
|
||||
except sqlite3.OperationalError:
|
||||
pass # column already exists
|
||||
conn.commit()
|
||||
logger.info("vitality_snapshots schema ensured")
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def _fetch_claim_index(url: str = "http://localhost:8080/claim-index") -> dict | None:
|
||||
"""Fetch claim-index from pipeline health API."""
|
||||
try:
|
||||
req = urllib.request.Request(url, headers={"Accept": "application/json"})
|
||||
with urllib.request.urlopen(req, timeout=10) as resp:
|
||||
return json.loads(resp.read())
|
||||
except Exception as e:
|
||||
logger.warning("claim-index fetch failed: %s", e)
|
||||
return None
|
||||
|
||||
|
||||
def _ro_conn(db_path: str) -> sqlite3.Connection:
|
||||
conn = sqlite3.connect(f"file:{db_path}?mode=ro", uri=True, timeout=30)
|
||||
conn.row_factory = sqlite3.Row
|
||||
return conn
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Dimension 1: knowledge_output — "How much has this agent produced?"
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def collect_knowledge_output(conn: sqlite3.Connection, agent: str) -> list[dict]:
|
||||
"""Claims merged, domain count, PRs submitted."""
|
||||
metrics = []
|
||||
|
||||
row = conn.execute(
|
||||
"SELECT COUNT(*) as cnt FROM prs WHERE agent = ? AND status = 'merged'",
|
||||
(agent,),
|
||||
).fetchone()
|
||||
metrics.append({"metric": "claims_merged", "value": row["cnt"], "unit": "claims"})
|
||||
|
||||
row = conn.execute(
|
||||
"SELECT COUNT(DISTINCT domain) as cnt FROM prs "
|
||||
"WHERE agent = ? AND domain IS NOT NULL AND status = 'merged'",
|
||||
(agent,),
|
||||
).fetchone()
|
||||
metrics.append({"metric": "domains_contributed", "value": row["cnt"], "unit": "domains"})
|
||||
|
||||
row = conn.execute(
|
||||
"SELECT COUNT(*) as cnt FROM prs WHERE agent = ? AND created_at > datetime('now', '-7 days')",
|
||||
(agent,),
|
||||
).fetchone()
|
||||
metrics.append({"metric": "prs_7d", "value": row["cnt"], "unit": "PRs"})
|
||||
|
||||
return metrics
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Dimension 2: knowledge_quality — "How good is the output?"
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def collect_knowledge_quality(
|
||||
conn: sqlite3.Connection, claim_index: dict | None, agent: str
|
||||
) -> list[dict]:
|
||||
"""Evidence density, challenge rate, cross-domain links, domain coverage."""
|
||||
metrics = []
|
||||
agent_domains = AGENT_DOMAINS.get(agent, [])
|
||||
|
||||
# Challenge rate = challenge PRs / total PRs
|
||||
rows = conn.execute(
|
||||
"SELECT commit_type, COUNT(*) as cnt FROM prs "
|
||||
"WHERE agent = ? AND commit_type IS NOT NULL GROUP BY commit_type",
|
||||
(agent,),
|
||||
).fetchall()
|
||||
total = sum(r["cnt"] for r in rows)
|
||||
type_counts = {r["commit_type"]: r["cnt"] for r in rows}
|
||||
challenge_rate = type_counts.get("challenge", 0) / total if total > 0 else 0
|
||||
metrics.append({"metric": "challenge_rate", "value": round(challenge_rate, 4), "unit": "ratio"})
|
||||
|
||||
# Activity breadth (distinct commit types)
|
||||
metrics.append({"metric": "activity_breadth", "value": len(type_counts), "unit": "types"})
|
||||
|
||||
# Evidence density + cross-domain links from claim-index
|
||||
# Match by domain field OR file path prefix (catches core/, foundations/ claims)
|
||||
agent_paths = AGENT_PATHS.get(agent, [])
|
||||
if claim_index and (agent_domains or agent_paths):
|
||||
claims = claim_index.get("claims", [])
|
||||
agent_claims = [
|
||||
c for c in claims
|
||||
if c.get("domain") in agent_domains
|
||||
or any(c.get("file", "").startswith(p) for p in agent_paths)
|
||||
]
|
||||
total_claims = len(agent_claims)
|
||||
|
||||
# Evidence density: claims with incoming links / total claims
|
||||
linked = sum(1 for c in agent_claims if c.get("incoming_count", 0) > 0)
|
||||
density = linked / total_claims if total_claims > 0 else 0
|
||||
metrics.append({"metric": "evidence_density", "value": round(density, 4), "unit": "ratio"})
|
||||
|
||||
# Cross-domain links
|
||||
cross_domain = sum(
|
||||
1 for c in agent_claims
|
||||
for link in c.get("outgoing_links", [])
|
||||
if any(d in link for d in claim_index.get("domains", {}).keys()
|
||||
if d not in agent_domains)
|
||||
)
|
||||
metrics.append({"metric": "cross_domain_links", "value": cross_domain, "unit": "links"})
|
||||
|
||||
# Domain coverage: agent's claims / average domain size
|
||||
domains_data = claim_index.get("domains", {})
|
||||
agent_claim_count = sum(domains_data.get(d, 0) for d in agent_domains)
|
||||
avg_domain_size = (sum(domains_data.values()) / len(domains_data)) if domains_data else 1
|
||||
coverage = min(agent_claim_count / avg_domain_size, 1.0) if avg_domain_size > 0 else 0
|
||||
metrics.append({"metric": "domain_coverage", "value": round(coverage, 4), "unit": "ratio"})
|
||||
else:
|
||||
metrics.append({"metric": "evidence_density", "value": 0, "unit": "ratio"})
|
||||
metrics.append({"metric": "cross_domain_links", "value": 0, "unit": "links"})
|
||||
metrics.append({"metric": "domain_coverage", "value": 0, "unit": "ratio"})
|
||||
|
||||
return metrics
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Dimension 3: contributor_engagement — "Who contributes to this agent's domain?"
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def collect_contributor_engagement(conn: sqlite3.Connection, agent: str) -> list[dict]:
|
||||
"""Unique submitters to this agent's domain."""
|
||||
row = conn.execute(
|
||||
"SELECT COUNT(DISTINCT submitted_by) as cnt FROM prs "
|
||||
"WHERE agent = ? AND submitted_by IS NOT NULL AND submitted_by != ''",
|
||||
(agent,),
|
||||
).fetchone()
|
||||
return [
|
||||
{"metric": "unique_submitters", "value": row["cnt"], "unit": "contributors"},
|
||||
]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Dimension 4: review_performance — "How good is the evaluator feedback loop?"
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def collect_review_performance(conn: sqlite3.Connection, agent: str) -> list[dict]:
|
||||
"""Approval rate, rejection reasons from review_records."""
|
||||
metrics = []
|
||||
|
||||
# Check if review_records table exists
|
||||
table_check = conn.execute(
|
||||
"SELECT name FROM sqlite_master WHERE type='table' AND name='review_records'"
|
||||
).fetchone()
|
||||
if not table_check:
|
||||
return [
|
||||
{"metric": "approval_rate", "value": 0, "unit": "ratio"},
|
||||
{"metric": "total_reviews", "value": 0, "unit": "reviews"},
|
||||
]
|
||||
|
||||
# Overall approval rate for this agent's claims (join through prs table)
|
||||
row = conn.execute(
|
||||
"SELECT COUNT(*) as total, "
|
||||
"SUM(CASE WHEN r.outcome = 'approved' THEN 1 ELSE 0 END) as approved, "
|
||||
"SUM(CASE WHEN r.outcome = 'approved-with-changes' THEN 1 ELSE 0 END) as with_changes, "
|
||||
"SUM(CASE WHEN r.outcome = 'rejected' THEN 1 ELSE 0 END) as rejected "
|
||||
"FROM review_records r "
|
||||
"JOIN prs p ON r.pr_number = p.pr_number "
|
||||
"WHERE LOWER(p.agent) = LOWER(?)",
|
||||
(agent,),
|
||||
).fetchone()
|
||||
total = row["total"] or 0
|
||||
approved = (row["approved"] or 0) + (row["with_changes"] or 0)
|
||||
rejected = row["rejected"] or 0
|
||||
approval_rate = approved / total if total > 0 else 0
|
||||
|
||||
metrics.append({"metric": "total_reviews", "value": total, "unit": "reviews"})
|
||||
metrics.append({"metric": "approval_rate", "value": round(approval_rate, 4), "unit": "ratio"})
|
||||
metrics.append({"metric": "approved", "value": row["approved"] or 0, "unit": "reviews"})
|
||||
metrics.append({"metric": "approved_with_changes", "value": row["with_changes"] or 0, "unit": "reviews"})
|
||||
metrics.append({"metric": "rejected", "value": rejected, "unit": "reviews"})
|
||||
|
||||
# Top rejection reasons (last 30 days)
|
||||
reasons = conn.execute(
|
||||
"SELECT r.rejection_reason, COUNT(*) as cnt FROM review_records r "
|
||||
"JOIN prs p ON r.pr_number = p.pr_number "
|
||||
"WHERE LOWER(p.agent) = LOWER(?) AND r.outcome = 'rejected' "
|
||||
"AND r.rejection_reason IS NOT NULL "
|
||||
"AND r.review_date > datetime('now', '-30 days') "
|
||||
"GROUP BY r.rejection_reason ORDER BY cnt DESC",
|
||||
(agent,),
|
||||
).fetchall()
|
||||
for r in reasons:
|
||||
metrics.append({
|
||||
"metric": f"rejection_{r['rejection_reason']}",
|
||||
"value": r["cnt"],
|
||||
"unit": "rejections",
|
||||
})
|
||||
|
||||
return metrics
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Dimension 5: spend_efficiency — "What does it cost per merged claim?"
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def collect_spend_efficiency(conn: sqlite3.Connection, agent: str) -> list[dict]:
|
||||
"""Cost per merged claim, total spend, response costs."""
|
||||
metrics = []
|
||||
|
||||
# Pipeline cost attributed to this agent (from prs.cost_usd)
|
||||
row = conn.execute(
|
||||
"SELECT COALESCE(SUM(cost_usd), 0) as cost, COUNT(*) as merged "
|
||||
"FROM prs WHERE agent = ? AND status = 'merged'",
|
||||
(agent,),
|
||||
).fetchone()
|
||||
total_cost = row["cost"] or 0
|
||||
merged = row["merged"] or 0
|
||||
cost_per_claim = total_cost / merged if merged > 0 else 0
|
||||
|
||||
metrics.append({"metric": "total_pipeline_cost", "value": round(total_cost, 4), "unit": "USD"})
|
||||
metrics.append({"metric": "cost_per_merged_claim", "value": round(cost_per_claim, 4), "unit": "USD"})
|
||||
|
||||
# Response audit costs (Telegram bot) — per-agent
|
||||
row = conn.execute(
|
||||
"SELECT COALESCE(SUM(generation_cost), 0) as cost, COUNT(*) as cnt "
|
||||
"FROM response_audit WHERE agent = ?",
|
||||
(agent,),
|
||||
).fetchone()
|
||||
metrics.append({"metric": "response_cost_total", "value": round(row["cost"], 4), "unit": "USD"})
|
||||
metrics.append({"metric": "total_responses", "value": row["cnt"], "unit": "responses"})
|
||||
|
||||
# 24h spend snapshot
|
||||
row = conn.execute(
|
||||
"SELECT COALESCE(SUM(generation_cost), 0) as cost "
|
||||
"FROM response_audit WHERE agent = ? AND timestamp > datetime('now', '-24 hours')",
|
||||
(agent,),
|
||||
).fetchone()
|
||||
metrics.append({"metric": "response_cost_24h", "value": round(row["cost"], 4), "unit": "USD"})
|
||||
|
||||
return metrics
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Dimension 6: autonomy — "How independently does this agent act?"
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def collect_autonomy(conn: sqlite3.Connection, agent: str) -> list[dict]:
|
||||
"""Self-directed actions, active days."""
|
||||
metrics = []
|
||||
|
||||
# Autonomous responses in last 24h
|
||||
row = conn.execute(
|
||||
"SELECT COUNT(*) as cnt FROM response_audit "
|
||||
"WHERE agent = ? AND timestamp > datetime('now', '-24 hours')",
|
||||
(agent,),
|
||||
).fetchone()
|
||||
metrics.append({"metric": "autonomous_responses_24h", "value": row["cnt"], "unit": "actions"})
|
||||
|
||||
# Active days in last 7
|
||||
row = conn.execute(
|
||||
"SELECT COUNT(DISTINCT date(created_at)) as days FROM prs "
|
||||
"WHERE agent = ? AND created_at > datetime('now', '-7 days')",
|
||||
(agent,),
|
||||
).fetchone()
|
||||
metrics.append({"metric": "active_days_7d", "value": row["days"], "unit": "days"})
|
||||
|
||||
return metrics
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Dimension 7: infrastructure_health — "Is the agent's machinery working?"
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def collect_infrastructure_health(conn: sqlite3.Connection, agent: str) -> list[dict]:
|
||||
"""Circuit breakers, PR success rate, agent-state liveness."""
|
||||
metrics = []
|
||||
|
||||
# Circuit breakers
|
||||
rows = conn.execute(
|
||||
"SELECT name, state FROM circuit_breakers WHERE name LIKE ?",
|
||||
(f"%{agent}%",),
|
||||
).fetchall()
|
||||
open_breakers = sum(1 for r in rows if r["state"] != "closed")
|
||||
metrics.append({"metric": "open_circuit_breakers", "value": open_breakers, "unit": "breakers"})
|
||||
|
||||
# PR success rate last 7 days
|
||||
row = conn.execute(
|
||||
"SELECT COUNT(*) as total, "
|
||||
"SUM(CASE WHEN status='merged' THEN 1 ELSE 0 END) as merged "
|
||||
"FROM prs WHERE agent = ? AND created_at > datetime('now', '-7 days')",
|
||||
(agent,),
|
||||
).fetchone()
|
||||
total = row["total"]
|
||||
rate = row["merged"] / total if total > 0 else 0
|
||||
metrics.append({"metric": "merge_rate_7d", "value": round(rate, 4), "unit": "ratio"})
|
||||
|
||||
# Agent-state liveness (read metrics.json from filesystem)
|
||||
state_file = AGENT_STATE_DIR / agent / "metrics.json"
|
||||
if state_file.exists():
|
||||
try:
|
||||
with open(state_file) as f:
|
||||
state = json.load(f)
|
||||
lifetime = state.get("lifetime", {})
|
||||
metrics.append({
|
||||
"metric": "sessions_total",
|
||||
"value": lifetime.get("sessions_total", 0),
|
||||
"unit": "sessions",
|
||||
})
|
||||
metrics.append({
|
||||
"metric": "sessions_timeout",
|
||||
"value": lifetime.get("sessions_timeout", 0),
|
||||
"unit": "sessions",
|
||||
})
|
||||
metrics.append({
|
||||
"metric": "sessions_error",
|
||||
"value": lifetime.get("sessions_error", 0),
|
||||
"unit": "sessions",
|
||||
})
|
||||
except (json.JSONDecodeError, OSError) as e:
|
||||
logger.warning("Failed to read agent-state for %s: %s", agent, e)
|
||||
|
||||
return metrics
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Dimensions 8-10: Stubs (no data sources yet)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def collect_social_reach(agent: str) -> list[dict]:
|
||||
"""Social dimension: stub zeros until X API accounts are active."""
|
||||
return [
|
||||
{"metric": "followers", "value": 0, "unit": "followers"},
|
||||
{"metric": "impressions_7d", "value": 0, "unit": "impressions"},
|
||||
{"metric": "engagement_rate", "value": 0, "unit": "ratio"},
|
||||
]
|
||||
|
||||
|
||||
def collect_capital(agent: str) -> list[dict]:
|
||||
"""Capital dimension: stub zeros until treasury/revenue tracking exists."""
|
||||
return [
|
||||
{"metric": "aum", "value": 0, "unit": "USD"},
|
||||
{"metric": "treasury", "value": 0, "unit": "USD"},
|
||||
]
|
||||
|
||||
|
||||
def collect_external_impact(agent: str) -> list[dict]:
|
||||
"""External impact dimension: stub zeros until manual tracking exists."""
|
||||
return [
|
||||
{"metric": "decisions_informed", "value": 0, "unit": "decisions"},
|
||||
{"metric": "deals_sourced", "value": 0, "unit": "deals"},
|
||||
]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Orchestration
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
DIMENSION_MAP = {
|
||||
"knowledge_output": lambda conn, ci, agent: collect_knowledge_output(conn, agent),
|
||||
"knowledge_quality": collect_knowledge_quality,
|
||||
"contributor_engagement": lambda conn, ci, agent: collect_contributor_engagement(conn, agent),
|
||||
"review_performance": lambda conn, ci, agent: collect_review_performance(conn, agent),
|
||||
"spend_efficiency": lambda conn, ci, agent: collect_spend_efficiency(conn, agent),
|
||||
"autonomy": lambda conn, ci, agent: collect_autonomy(conn, agent),
|
||||
"infrastructure_health": lambda conn, ci, agent: collect_infrastructure_health(conn, agent),
|
||||
"social_reach": lambda conn, ci, agent: collect_social_reach(agent),
|
||||
"capital": lambda conn, ci, agent: collect_capital(agent),
|
||||
"external_impact": lambda conn, ci, agent: collect_external_impact(agent),
|
||||
}
|
||||
|
||||
|
||||
def collect_all_for_agent(
|
||||
db_path: str,
|
||||
agent: str,
|
||||
claim_index_url: str = "http://localhost:8080/claim-index",
|
||||
) -> dict:
|
||||
"""Collect all 10 vitality dimensions for a single agent.
|
||||
Returns {dimension: [metrics]}.
|
||||
"""
|
||||
claim_index = _fetch_claim_index(claim_index_url)
|
||||
conn = _ro_conn(db_path)
|
||||
try:
|
||||
result = {}
|
||||
for dim_key, collector in DIMENSION_MAP.items():
|
||||
try:
|
||||
result[dim_key] = collector(conn, claim_index, agent)
|
||||
except Exception as e:
|
||||
logger.error("collector %s failed for %s: %s", dim_key, agent, e)
|
||||
result[dim_key] = []
|
||||
return result
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def collect_system_aggregate(
|
||||
db_path: str,
|
||||
claim_index_url: str = "http://localhost:8080/claim-index",
|
||||
) -> dict:
|
||||
"""System-level aggregate vitality metrics."""
|
||||
claim_index = _fetch_claim_index(claim_index_url)
|
||||
conn = _ro_conn(db_path)
|
||||
try:
|
||||
metrics = {}
|
||||
|
||||
# Knowledge totals
|
||||
total_claims = claim_index["total_claims"] if claim_index else 0
|
||||
orphan_ratio = claim_index.get("orphan_ratio", 0) if claim_index else 0
|
||||
domain_count = len(claim_index.get("domains", {})) if claim_index else 0
|
||||
|
||||
metrics["knowledge_output"] = [
|
||||
{"metric": "total_claims", "value": total_claims, "unit": "claims"},
|
||||
{"metric": "total_domains", "value": domain_count, "unit": "domains"},
|
||||
{"metric": "orphan_ratio", "value": round(orphan_ratio, 4), "unit": "ratio"},
|
||||
]
|
||||
|
||||
# Cross-domain citation rate
|
||||
if claim_index:
|
||||
claims = claim_index.get("claims", [])
|
||||
total_links = sum(c.get("outgoing_count", 0) for c in claims)
|
||||
cross_domain = 0
|
||||
for c in claims:
|
||||
src_domain = c.get("domain")
|
||||
for link in c.get("outgoing_links", []):
|
||||
linked_claims = [
|
||||
x for x in claims
|
||||
if x.get("stem") in link or x.get("file", "").endswith(link + ".md")
|
||||
]
|
||||
for lc in linked_claims:
|
||||
if lc.get("domain") != src_domain:
|
||||
cross_domain += 1
|
||||
metrics["knowledge_quality"] = [
|
||||
{"metric": "cross_domain_citation_rate",
|
||||
"value": round(cross_domain / max(total_links, 1), 4),
|
||||
"unit": "ratio"},
|
||||
]
|
||||
|
||||
# Pipeline throughput
|
||||
row = conn.execute(
|
||||
"SELECT COUNT(*) as merged FROM prs "
|
||||
"WHERE status='merged' AND merged_at > datetime('now', '-24 hours')"
|
||||
).fetchone()
|
||||
row2 = conn.execute("SELECT COUNT(*) as total FROM sources").fetchone()
|
||||
row3 = conn.execute(
|
||||
"SELECT COUNT(*) as pending FROM prs "
|
||||
"WHERE status NOT IN ('merged','rejected','closed')"
|
||||
).fetchone()
|
||||
|
||||
metrics["infrastructure_health"] = [
|
||||
{"metric": "prs_merged_24h", "value": row["merged"], "unit": "PRs/day"},
|
||||
{"metric": "total_sources", "value": row2["total"], "unit": "sources"},
|
||||
{"metric": "queue_depth", "value": row3["pending"], "unit": "PRs"},
|
||||
]
|
||||
|
||||
# Total spend
|
||||
row = conn.execute(
|
||||
"SELECT COALESCE(SUM(cost_usd), 0) as cost "
|
||||
"FROM costs WHERE date > date('now', '-1 day')"
|
||||
).fetchone()
|
||||
row2 = conn.execute(
|
||||
"SELECT COALESCE(SUM(generation_cost), 0) as cost FROM response_audit "
|
||||
"WHERE timestamp > datetime('now', '-24 hours')"
|
||||
).fetchone()
|
||||
metrics["spend_efficiency"] = [
|
||||
{"metric": "pipeline_cost_24h", "value": round(row["cost"], 4), "unit": "USD"},
|
||||
{"metric": "response_cost_24h", "value": round(row2["cost"], 4), "unit": "USD"},
|
||||
{"metric": "total_cost_24h",
|
||||
"value": round(row["cost"] + row2["cost"], 4), "unit": "USD"},
|
||||
]
|
||||
|
||||
# Stubs
|
||||
metrics["social_reach"] = [{"metric": "total_followers", "value": 0, "unit": "followers"}]
|
||||
metrics["capital"] = [{"metric": "total_aum", "value": 0, "unit": "USD"}]
|
||||
|
||||
return metrics
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def record_snapshot(
|
||||
db_path: str,
|
||||
claim_index_url: str = "http://localhost:8080/claim-index",
|
||||
):
|
||||
"""Run a full vitality snapshot — one row per agent per dimension per metric."""
|
||||
now = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||
rows = []
|
||||
|
||||
# Per-agent snapshots
|
||||
for agent in ALL_AGENTS:
|
||||
try:
|
||||
dimensions = collect_all_for_agent(db_path, agent, claim_index_url)
|
||||
for dim_name, metrics in dimensions.items():
|
||||
collector_name = f"{dim_name}_collector"
|
||||
for m in metrics:
|
||||
rows.append((
|
||||
agent, dim_name, m["metric"], m["value"],
|
||||
m["unit"], collector_name, now,
|
||||
))
|
||||
except Exception as e:
|
||||
logger.error("vitality collection failed for %s: %s", agent, e)
|
||||
|
||||
# System aggregate
|
||||
try:
|
||||
system = collect_system_aggregate(db_path, claim_index_url)
|
||||
for dim_name, metrics in system.items():
|
||||
for m in metrics:
|
||||
rows.append((
|
||||
"_system", dim_name, m["metric"], m["value"],
|
||||
m["unit"], "system_aggregate", now,
|
||||
))
|
||||
except Exception as e:
|
||||
logger.error("vitality system aggregate failed: %s", e)
|
||||
|
||||
# Write all rows
|
||||
ensure_schema(db_path)
|
||||
conn = sqlite3.connect(db_path, timeout=30)
|
||||
try:
|
||||
conn.executemany(
|
||||
"INSERT OR REPLACE INTO vitality_snapshots "
|
||||
"(agent_name, dimension, metric, value, unit, source, recorded_at) "
|
||||
"VALUES (?, ?, ?, ?, ?, ?, ?)",
|
||||
rows,
|
||||
)
|
||||
conn.commit()
|
||||
logger.info(
|
||||
"vitality snapshot recorded: %d rows for %d agents + system",
|
||||
len(rows), len(ALL_AGENTS),
|
||||
)
|
||||
return {"rows_written": len(rows), "agents": len(ALL_AGENTS), "recorded_at": now}
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
"""CLI: python3 vitality.py [db_path] — runs a snapshot."""
|
||||
import sys
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
db = sys.argv[1] if len(sys.argv) > 1 else "/opt/teleo-eval/pipeline/pipeline.db"
|
||||
result = record_snapshot(db)
|
||||
print(json.dumps(result, indent=2))
|
||||
293
diagnostics/vitality_routes.py
Normal file
293
diagnostics/vitality_routes.py
Normal file
|
|
@ -0,0 +1,293 @@
|
|||
"""Vitality API routes for Argus diagnostics dashboard.
|
||||
|
||||
Endpoints:
|
||||
GET /api/vitality — latest snapshot + time-series for all agents or one
|
||||
GET /api/vitality/snapshot — trigger a new snapshot (POST-like via GET for cron curl)
|
||||
GET /api/vitality/leaderboard — agents ranked by composite vitality score
|
||||
|
||||
Owner: Argus
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import sqlite3
|
||||
from pathlib import Path
|
||||
|
||||
from aiohttp import web
|
||||
|
||||
from vitality import (
|
||||
ALL_AGENTS,
|
||||
MIGRATION_SQL,
|
||||
collect_all_for_agent,
|
||||
collect_system_aggregate,
|
||||
record_snapshot,
|
||||
)
|
||||
|
||||
logger = logging.getLogger("argus.vitality")
|
||||
|
||||
# Composite vitality weights — Leo-approved 2026-04-08
|
||||
# Dimension keys match Ship's refactored vitality.py DIMENSION_MAP
|
||||
VITALITY_WEIGHTS = {
|
||||
"knowledge_output": 0.30, # primary output — highest weight
|
||||
"knowledge_quality": 0.20, # was "diversity" — quality of output
|
||||
"contributor_engagement": 0.15, # attracting external contributors
|
||||
"review_performance": 0.00, # new dim, zero until review_records populated
|
||||
"autonomy": 0.15, # independent action
|
||||
"infrastructure_health": 0.05, # machinery working
|
||||
"spend_efficiency": 0.05, # cost discipline
|
||||
"social_reach": 0.00, # zero until accounts active
|
||||
"capital": 0.00, # zero until treasury exists
|
||||
"external_impact": 0.00, # zero until measurable
|
||||
}
|
||||
|
||||
# Public paths (no auth required)
|
||||
VITALITY_PUBLIC_PATHS = frozenset({
|
||||
"/api/vitality",
|
||||
"/api/vitality/snapshot",
|
||||
"/api/vitality/leaderboard",
|
||||
})
|
||||
|
||||
|
||||
def _ro_conn(db_path: str) -> sqlite3.Connection:
|
||||
conn = sqlite3.connect(f"file:{db_path}?mode=ro", uri=True, timeout=30)
|
||||
conn.row_factory = sqlite3.Row
|
||||
return conn
|
||||
|
||||
|
||||
async def handle_vitality(request: web.Request) -> web.Response:
|
||||
"""GET /api/vitality?agent=<name>&days=7
|
||||
|
||||
Returns latest snapshot and time-series data.
|
||||
If agent is specified, returns that agent only. Otherwise returns all.
|
||||
"""
|
||||
db_path = request.app["db_path"]
|
||||
agent = request.query.get("agent")
|
||||
try:
|
||||
days = min(int(request.query.get("days", "7")), 90)
|
||||
except ValueError:
|
||||
days = 7
|
||||
|
||||
conn = _ro_conn(db_path)
|
||||
try:
|
||||
# Check if table exists
|
||||
table_check = conn.execute(
|
||||
"SELECT name FROM sqlite_master WHERE type='table' AND name='vitality_snapshots'"
|
||||
).fetchone()
|
||||
if not table_check:
|
||||
return web.json_response({
|
||||
"error": "No vitality data yet. Trigger a snapshot first via /api/vitality/snapshot",
|
||||
"has_data": False
|
||||
})
|
||||
|
||||
# Latest snapshot timestamp
|
||||
latest = conn.execute(
|
||||
"SELECT MAX(recorded_at) as ts FROM vitality_snapshots"
|
||||
).fetchone()
|
||||
latest_ts = latest["ts"] if latest else None
|
||||
|
||||
if not latest_ts:
|
||||
return web.json_response({"has_data": False})
|
||||
|
||||
# Latest snapshot data
|
||||
if agent:
|
||||
agents_filter = [agent]
|
||||
else:
|
||||
agents_filter = ALL_AGENTS + ["_system"]
|
||||
|
||||
result = {"latest_snapshot": latest_ts, "agents": {}}
|
||||
|
||||
for a in agents_filter:
|
||||
rows = conn.execute(
|
||||
"SELECT dimension, metric, value, unit FROM vitality_snapshots "
|
||||
"WHERE agent_name = ? AND recorded_at = ?",
|
||||
(a, latest_ts)
|
||||
).fetchall()
|
||||
|
||||
if not rows:
|
||||
continue
|
||||
|
||||
dimensions = {}
|
||||
for r in rows:
|
||||
dim = r["dimension"]
|
||||
if dim not in dimensions:
|
||||
dimensions[dim] = []
|
||||
dimensions[dim].append({
|
||||
"metric": r["metric"],
|
||||
"value": r["value"],
|
||||
"unit": r["unit"],
|
||||
})
|
||||
result["agents"][a] = dimensions
|
||||
|
||||
# Time-series for trend charts (one data point per snapshot)
|
||||
ts_query_agent = agent if agent else "_system"
|
||||
ts_rows = conn.execute(
|
||||
"SELECT recorded_at, dimension, metric, value "
|
||||
"FROM vitality_snapshots "
|
||||
"WHERE agent_name = ? AND recorded_at > datetime('now', ?)"
|
||||
"ORDER BY recorded_at",
|
||||
(ts_query_agent, f"-{days} days")
|
||||
).fetchall()
|
||||
|
||||
time_series = {}
|
||||
for r in ts_rows:
|
||||
key = f"{r['dimension']}.{r['metric']}"
|
||||
if key not in time_series:
|
||||
time_series[key] = []
|
||||
time_series[key].append({
|
||||
"t": r["recorded_at"],
|
||||
"v": r["value"],
|
||||
})
|
||||
result["time_series"] = time_series
|
||||
result["has_data"] = True
|
||||
|
||||
return web.json_response(result)
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
async def handle_vitality_snapshot(request: web.Request) -> web.Response:
|
||||
"""GET /api/vitality/snapshot — trigger a new snapshot collection.
|
||||
|
||||
Used by cron: curl http://localhost:8081/api/vitality/snapshot
|
||||
Requires ?confirm=1 to prevent accidental triggers from crawlers/prefetch.
|
||||
"""
|
||||
if request.query.get("confirm") != "1":
|
||||
return web.json_response(
|
||||
{"status": "noop", "error": "Add ?confirm=1 to trigger a snapshot write"},
|
||||
status=400,
|
||||
)
|
||||
db_path = request.app["db_path"]
|
||||
claim_index_url = request.app.get("claim_index_url", "http://localhost:8080/claim-index")
|
||||
|
||||
try:
|
||||
result = record_snapshot(db_path, claim_index_url)
|
||||
return web.json_response({"status": "ok", **result})
|
||||
except Exception as e:
|
||||
logger.error("vitality snapshot failed: %s", e)
|
||||
return web.json_response({"status": "error", "error": str(e)}, status=500)
|
||||
|
||||
|
||||
async def handle_vitality_leaderboard(request: web.Request) -> web.Response:
|
||||
"""GET /api/vitality/leaderboard — agents ranked by composite vitality score.
|
||||
|
||||
Scoring approach:
|
||||
- Each dimension gets a 0-1 normalized score based on the metric values
|
||||
- Weighted sum produces composite score
|
||||
- Agents ranked by composite score descending
|
||||
"""
|
||||
db_path = request.app["db_path"]
|
||||
conn = _ro_conn(db_path)
|
||||
try:
|
||||
table_check = conn.execute(
|
||||
"SELECT name FROM sqlite_master WHERE type='table' AND name='vitality_snapshots'"
|
||||
).fetchone()
|
||||
if not table_check:
|
||||
return web.json_response({"error": "No vitality data yet", "has_data": False})
|
||||
|
||||
latest = conn.execute(
|
||||
"SELECT MAX(recorded_at) as ts FROM vitality_snapshots"
|
||||
).fetchone()
|
||||
if not latest or not latest["ts"]:
|
||||
return web.json_response({"has_data": False})
|
||||
|
||||
latest_ts = latest["ts"]
|
||||
|
||||
# Collect all agents' latest data
|
||||
agent_scores = []
|
||||
for agent in ALL_AGENTS:
|
||||
rows = conn.execute(
|
||||
"SELECT dimension, metric, value FROM vitality_snapshots "
|
||||
"WHERE agent_name = ? AND recorded_at = ?",
|
||||
(agent, latest_ts)
|
||||
).fetchall()
|
||||
if not rows:
|
||||
continue
|
||||
|
||||
dims = {}
|
||||
for r in rows:
|
||||
dim = r["dimension"]
|
||||
if dim not in dims:
|
||||
dims[dim] = {}
|
||||
dims[dim][r["metric"]] = r["value"]
|
||||
|
||||
# Normalize each dimension to 0-1
|
||||
# Dimension keys match Ship's refactored vitality.py DIMENSION_MAP
|
||||
dim_scores = {}
|
||||
|
||||
# knowledge_output: claims_merged (cap at 100 = 1.0)
|
||||
ko = dims.get("knowledge_output", {})
|
||||
claims = ko.get("claims_merged", 0)
|
||||
dim_scores["knowledge_output"] = min(claims / 100, 1.0)
|
||||
|
||||
# knowledge_quality: challenge_rate + breadth + evidence_density + domain_coverage
|
||||
kq = dims.get("knowledge_quality", {})
|
||||
cr = kq.get("challenge_rate", 0)
|
||||
breadth = kq.get("activity_breadth", 0)
|
||||
evidence = kq.get("evidence_density", 0)
|
||||
coverage = kq.get("domain_coverage", 0)
|
||||
dim_scores["knowledge_quality"] = min(
|
||||
(cr / 0.1 * 0.2 + breadth / 4 * 0.2 + evidence * 0.3 + coverage * 0.3), 1.0
|
||||
)
|
||||
|
||||
# contributor_engagement: unique_submitters (cap at 5 = 1.0)
|
||||
ce = dims.get("contributor_engagement", {})
|
||||
dim_scores["contributor_engagement"] = min(ce.get("unique_submitters", 0) / 5, 1.0)
|
||||
|
||||
# review_performance: approval_rate from review_records (0 until populated)
|
||||
rp = dims.get("review_performance", {})
|
||||
dim_scores["review_performance"] = rp.get("approval_rate", 0)
|
||||
|
||||
# autonomy: active_days_7d (7 = 1.0)
|
||||
am = dims.get("autonomy", {})
|
||||
dim_scores["autonomy"] = min(am.get("active_days_7d", 0) / 7, 1.0)
|
||||
|
||||
# infrastructure_health: merge_rate_7d directly (already 0-1)
|
||||
ih = dims.get("infrastructure_health", {})
|
||||
dim_scores["infrastructure_health"] = ih.get("merge_rate_7d", 0)
|
||||
|
||||
# spend_efficiency: inverted — lower cost per claim is better
|
||||
se = dims.get("spend_efficiency", {})
|
||||
daily_cost = se.get("response_cost_24h", 0)
|
||||
dim_scores["spend_efficiency"] = max(1.0 - daily_cost / 10.0, 0)
|
||||
|
||||
# Social/Capital/External: stubbed at 0
|
||||
dim_scores["social_reach"] = 0
|
||||
dim_scores["capital"] = 0
|
||||
dim_scores["external_impact"] = 0
|
||||
|
||||
# Composite weighted score
|
||||
composite = sum(
|
||||
dim_scores.get(dim, 0) * weight
|
||||
for dim, weight in VITALITY_WEIGHTS.items()
|
||||
)
|
||||
|
||||
agent_scores.append({
|
||||
"agent": agent,
|
||||
"composite_score": round(composite, 4),
|
||||
"dimension_scores": {k: round(v, 4) for k, v in dim_scores.items()},
|
||||
"raw_highlights": {
|
||||
"claims_merged": int(claims),
|
||||
"merge_rate": round(ih.get("merge_rate_7d", 0) * 100, 1),
|
||||
"active_days": int(am.get("active_days_7d", 0)),
|
||||
"challenge_rate": round(cr * 100, 1),
|
||||
"evidence_density": round(evidence * 100, 1),
|
||||
},
|
||||
})
|
||||
|
||||
# Sort by composite score descending
|
||||
agent_scores.sort(key=lambda x: x["composite_score"], reverse=True)
|
||||
|
||||
return web.json_response({
|
||||
"has_data": True,
|
||||
"snapshot_at": latest_ts,
|
||||
"leaderboard": agent_scores,
|
||||
})
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def register_vitality_routes(app: web.Application):
|
||||
"""Register vitality endpoints on the aiohttp app."""
|
||||
app.router.add_get("/api/vitality", handle_vitality)
|
||||
app.router.add_get("/api/vitality/snapshot", handle_vitality_snapshot)
|
||||
app.router.add_get("/api/vitality/leaderboard", handle_vitality_leaderboard)
|
||||
62
docs/deploy-manifest.md
Normal file
62
docs/deploy-manifest.md
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
# Deploy Manifest
|
||||
|
||||
Every PR that touches VPS-deployed code must include a deploy manifest — either in the PR description or as a comment before requesting deploy. Rhea can reject deploys without one.
|
||||
|
||||
## Template
|
||||
|
||||
Copy this into your PR description and fill it in:
|
||||
|
||||
```
|
||||
## Deploy Manifest
|
||||
|
||||
**Files changed:**
|
||||
- path/to/file.py (new | modified | deleted)
|
||||
|
||||
**Services to restart:**
|
||||
- teleo-bot.service
|
||||
- teleo-eval.service
|
||||
|
||||
**New ReadWritePaths:** (leave blank if none)
|
||||
- /opt/teleo-eval/data/new-directory
|
||||
|
||||
**Migration steps:** (leave blank if none)
|
||||
- Run: sqlite3 pipeline.db < migrations/001-add-column.sql
|
||||
|
||||
**Endpoints affected:**
|
||||
- GET /health
|
||||
- GET /api/alerts
|
||||
|
||||
**Expected behavior after deploy:**
|
||||
- /health returns 200 with new field X
|
||||
- New cron runs every 5 minutes
|
||||
```
|
||||
|
||||
## What Counts as VPS-Deployed Code
|
||||
|
||||
| File type | Example | Needs manifest? |
|
||||
|-----------|---------|-----------------|
|
||||
| Python application code | bot.py, app.py, alerting.py | Yes |
|
||||
| Shell scripts on VPS | extract-cron.sh, evaluate-trigger.sh | Yes |
|
||||
| systemd service/timer files | teleo-bot.service | Yes |
|
||||
| Database migrations | ALTER TABLE, new tables | Yes |
|
||||
| HTML/CSS/JS served by app | dashboard.html, teleo-app | Yes |
|
||||
| Claim/source/entity markdown | domains/ai-alignment/claim.md | No |
|
||||
| Schema definitions | schemas/claim.md | No (but see schema-change-protocol.md) |
|
||||
| Agent identity/beliefs | agents/theseus/identity.md | No |
|
||||
|
||||
## Rules
|
||||
|
||||
1. **No deploy without manifest.** If the PR lacks one, Rhea bounces it back.
|
||||
2. **List every service that needs restart.** "Just restart everything" is not acceptable — it causes unnecessary downtime.
|
||||
3. **ReadWritePaths are mandatory.** If your code writes to a new path, say so. Missing ReadWritePaths is the #1 cause of silent deploy failures.
|
||||
4. **Endpoints affected enables verification.** Argus uses this field to run post-deploy smoke tests. Without it, verification is guesswork.
|
||||
5. **Migration steps must be idempotent.** If the deploy is retried, the migration shouldn't break.
|
||||
|
||||
## Post-Deploy Verification
|
||||
|
||||
After Rhea restarts the service:
|
||||
1. Argus hits every endpoint listed in "Endpoints affected"
|
||||
2. Argus checks systemd journal for errors in the last 60 seconds
|
||||
3. Argus reports pass/fail in the Engineering group chat
|
||||
|
||||
If verification fails, Rhea rolls back. The PR author fixes and resubmits.
|
||||
192
docs/multi-model-eval-architecture.md
Normal file
192
docs/multi-model-eval-architecture.md
Normal file
|
|
@ -0,0 +1,192 @@
|
|||
# Multi-Model Evaluation Architecture
|
||||
|
||||
Spec for adding a second-model evaluation pass to break correlated blind spots in claim review. Designed with Leo (primary evaluator). Implementation by Epimetheus.
|
||||
|
||||
## Problem
|
||||
|
||||
Kim et al. (ICML 2025): ~60% error agreement within same-model-family evaluations. Self-preference bias is linear with self-recognition. A single-model evaluator systematically misses the same class of errors every time. Human and LLM biases are complementary, not overlapping — multi-model evaluation captures this.
|
||||
|
||||
## Architecture
|
||||
|
||||
### Evaluation Sequence
|
||||
|
||||
1. **Leo evaluates first.** Verdict + reasoning stored as structured record.
|
||||
2. **Second model evaluates independently** against the same rubric. Different model family required — GPT-4o via OpenRouter or Gemini. Never another Claude instance.
|
||||
3. **System surfaces disagreements only.** Agreements are noise; disagreements are signal.
|
||||
4. **Leo makes final call** on all disagreements.
|
||||
|
||||
Sequencing rationale: Leo sees the second model's assessment **after** his own eval, never before. Seeing it before anchors judgment. Seeing it after functions as a genuine blind-spot check.
|
||||
|
||||
### Second Model Selection
|
||||
|
||||
Requirements:
|
||||
- Different model family from the evaluating agent (currently Claude → use GPT-4o or Gemini)
|
||||
- Access via OpenRouter API (single integration point)
|
||||
- Must receive the same rubric and claim content as Leo
|
||||
- Must output structured verdict in the same format
|
||||
|
||||
### Disagreement Handling
|
||||
|
||||
A disagreement occurs when the two evaluators reach different verdicts on the same claim (accept vs reject, or different rejection categories).
|
||||
|
||||
Disagreements surface in a review queue Leo checks before finalizing. Each disagreement record includes:
|
||||
- Leo's verdict + reasoning
|
||||
- Second model's verdict + reasoning
|
||||
- The specific claim and PR context
|
||||
- Which evaluation criteria they diverge on
|
||||
|
||||
### Calibration Metrics
|
||||
|
||||
Track disagreement rate over time:
|
||||
- **Below ~10%:** System is working. Evaluators are calibrated.
|
||||
- **10-25%:** Normal operating range. Disagreements are productive signal.
|
||||
- **Above ~25%:** Either the rubric is ambiguous or one evaluator is drifting. Both are actionable — trigger rubric review.
|
||||
|
||||
Disagreement rate itself becomes the primary calibration metric for evaluation quality.
|
||||
|
||||
## Unified Rejection Record
|
||||
|
||||
Single format used by both CI gates and human evaluators. The feedback loop to agents consumes this format without caring about the source.
|
||||
|
||||
```json
|
||||
{
|
||||
"source": "ci | evaluator | second_model",
|
||||
"category": "schema_violation | wiki_link_broken | weak_evidence | scope_mismatch | factual_error | precision_failure | opsec_violation",
|
||||
"severity": "hard | soft",
|
||||
"agent_id": "<producer of the rejected content>",
|
||||
"pr": "<PR number>",
|
||||
"file": "<file path in PR>",
|
||||
"claim_path": "<claim file path if different from file>",
|
||||
"detail": "<free text explanation>",
|
||||
"timestamp": "<ISO 8601>"
|
||||
}
|
||||
```
|
||||
|
||||
Field notes:
|
||||
- `source`: `ci` for automated gates, `evaluator` for Leo, `second_model` for the disagreement-check model
|
||||
- `severity`: `hard` = merge blocker (schema_violation, wiki_link_broken), `soft` = reviewer judgment (weak_evidence, precision_failure). Hard rejections trigger immediate resubmission attempts. Soft rejections accumulate toward the 3-strikes upgrade threshold.
|
||||
- `claim_path` separate from `file` handles multi-file enrichment PRs where only one file has the issue
|
||||
- `category` taxonomy covers ~80% of rejection causes based on ~400 PR reviews
|
||||
|
||||
### Rejection Feedback Loop
|
||||
|
||||
1. Rejection records flow to the producing agent as structured feedback.
|
||||
2. Agent receives the category, severity, and detail.
|
||||
3. Hard rejections → agent attempts immediate fix and resubmission.
|
||||
4. Soft rejections → agent accumulates feedback. **After 3 rejections of the same category from the same agent**, the system triggers a skill upgrade proposal.
|
||||
5. Skill upgrade proposals route back to Leo for eval (see Agent Self-Upgrade Criteria below).
|
||||
|
||||
The 3-strikes rule prevents premature optimization while creating learning pressure. Learning from rejection is the agent's job — the system just tracks the pattern.
|
||||
|
||||
## Automatable CI Rules
|
||||
|
||||
Five rules that catch ~80% of current rejections. Rules 1-2 are hard gates (block merge). Rules 3-5 are soft flags (surface to reviewer).
|
||||
|
||||
### Hard Gates
|
||||
|
||||
**1. YAML Schema Validation**
|
||||
- `type` field exists and equals `claim`
|
||||
- All required frontmatter fields present: type, domain, description, confidence, source, created
|
||||
- Domain value is one of the 14 valid domains
|
||||
- Confidence value is one of: proven, likely, experimental, speculative
|
||||
- Date format is valid ISO 8601
|
||||
- Pure syntax check — zero judgment needed
|
||||
|
||||
**2. Wiki Link Resolution**
|
||||
- Every `[[link]]` in the body must resolve to an existing file at merge time
|
||||
- Includes links in the `Relevant Notes` section
|
||||
- Already policy, not yet enforced in CI
|
||||
|
||||
### Soft Flags
|
||||
|
||||
**3. Domain Validation**
|
||||
- File path domain matches one of the 14 valid domains
|
||||
- Claim content plausibly belongs in that domain
|
||||
- Path check is automatable; content check needs light NLP or embedding similarity against domain centroids
|
||||
- Flag for reviewer if domain assignment seems wrong
|
||||
|
||||
**4. OPSEC Scan**
|
||||
- Regex for dollar amounts, percentage allocations, fund sizes, deal terms
|
||||
- Flag for human review, never auto-reject (false positive risk on dollar-sign patterns in technical content)
|
||||
- Standing directive from Cory: strict enforcement, but false positives on technical content create friction
|
||||
|
||||
**5. Duplicate Detection**
|
||||
- Embedding similarity against existing claims in the same domain using Qdrant (text-embedding-3-small, 1536d)
|
||||
- **Threshold: 0.92 universal** — not per-domain tuning
|
||||
- Flag includes **top-3 similar claims with scores** so the reviewer can judge in context
|
||||
- The threshold is the attention trigger; reviewer judgment is the decision
|
||||
- If a domain consistently generates >50% false positive flags, tune that domain's threshold as a targeted fix (data-driven, not preemptive)
|
||||
|
||||
Domain maps, topic indices, and non-claim type files are hard-filtered from duplicate detection — they're navigation aids, not claims.
|
||||
|
||||
## Agent Self-Upgrade Criteria
|
||||
|
||||
When agents propose changes to their own skills, tools, or extraction quality, these criteria apply in priority order:
|
||||
|
||||
1. **Scope compliance** — Does the upgrade stay within the agent's authorized domain? Extraction agent improving YAML parsing: yes. Same agent adding merge capability: no.
|
||||
2. **Measurable improvement** — Before/after on a concrete metric. Minimum: 3 test cases showing improvement with 0 regressions. No "this feels better."
|
||||
3. **Schema compliance preserved** — Upgrade cannot break existing quality gates. Full validation suite runs against output produced by the new skill.
|
||||
4. **Reversibility** — Every skill change must be revertable. If not, the evidence bar goes up significantly.
|
||||
5. **No scope creep** — The upgrade does what it claims, nothing more. Watch for "while I was in there I also..." additions.
|
||||
|
||||
Evidence bar difference: a **claim** needs sourced evidence. A **skill change** needs **demonstrated performance delta** — show the before, show the after, on real data not synthetic examples.
|
||||
|
||||
For skill changes that affect other agents' outputs (e.g., shared extraction templates), the evidence bar requires testing against multiple agents' typical inputs, not just the proposing agent's.
|
||||
|
||||
## Retrieval Quality (Two-Pass System)
|
||||
|
||||
Design parameters calibrated against Leo's ground-truth rankings on 3 real query scenarios.
|
||||
|
||||
### Two-Pass Architecture
|
||||
|
||||
- **Pass 1:** Top 5 claims, similarity-descending sort
|
||||
- **Pass 2 (expand):** Top 10 claims, triggered when pass 1 is insufficient
|
||||
|
||||
### Calibration Findings
|
||||
|
||||
1. **5 first-pass claims is viable for all tested scenarios** — but only if the 5 are well-chosen. Similarity ranking alone won't produce optimal results.
|
||||
|
||||
2. **Counter-evidence must be explicitly surfaced.** Similarity-descending sort systematically buries opposing-valence claims. Counter-claims are semantically adjacent but have opposite valence. Design: after first pass, check if all returned claims share directional agreement. If yes, force-include the highest-similarity opposing claim.
|
||||
|
||||
3. **Synthesis claims suppress their source claims.** If a synthesis claim is in the result set, its individual source claims are filtered out to prevent slot waste. Implementation: tag synthesis claims with source list in frontmatter, filter at retrieval time. **Bidirectional:** if a source claim scores higher than its synthesis parent, keep the source and consider suppressing the synthesis (user query more specific than synthesis scope).
|
||||
|
||||
4. **Cross-domain claims earn inclusion only when causally load-bearing.** Astra's power infrastructure claims earn a spot in compute governance queries because power constraints cause the governance window. Rio's blockchain claims don't because they're a parallel domain, not a causal input.
|
||||
|
||||
5. **Domain maps and topic indices hard-filtered from retrieval results.** Non-claim types (`type: "map"`, indices) should be the first filter in the pipeline, before similarity ranking runs.
|
||||
|
||||
### Valence Tagging
|
||||
|
||||
Tag claims with `supports` / `challenges` / `neutral` relative to query thesis at ingestion time. Lightweight, one-time cost per claim. Enables the counter-evidence surfacing logic without runtime sentiment analysis.
|
||||
|
||||
## Verifier Divergence Implications
|
||||
|
||||
From NLAH paper (Pan et al.): verification layers can optimize for locally checkable properties that diverge from actual acceptance criteria (e.g., verifier reports "solved" while benchmark fails). Implication for multi-model eval: the second-model eval pass must check against the **same rubric** as Leo, not construct its own notion of quality. Shared rubric enforcement is a hard requirement.
|
||||
|
||||
## Implementation Sequence
|
||||
|
||||
1. **Automatable CI rules** (hard gates first) — YAML schema validation + wiki link resolution. Foundation for everything else. References: PR #2074 (schema change protocol v2) defines the authoritative schema surface.
|
||||
2. **Automatable CI rules** (soft flags) — domain validation, OPSEC scan, duplicate detection via Qdrant.
|
||||
3. **Unified rejection record** — data structure for both CI and human rejections, stored in pipeline.db.
|
||||
4. **Rejection feedback loop** — structured feedback to agents with 3-strikes accumulation.
|
||||
5. **Multi-model eval integration** — OpenRouter connection, rubric sharing, disagreement queue.
|
||||
6. **Self-upgrade eval criteria** — codified in eval workflow, triggered by 3-strikes pattern.
|
||||
|
||||
## Evaluator Self-Review Prevention
|
||||
|
||||
When Leo proposes claims (cross-domain synthesis, foundations-level):
|
||||
- Leo cannot be the evaluator on his own proposals
|
||||
- Minimum 2 domain agent reviews required
|
||||
- Every domain touched must have a reviewer from that domain
|
||||
- The second-model eval pass still runs (provides the external check)
|
||||
- Cory has veto (rollback) authority as final backstop
|
||||
|
||||
This closes the obvious gap: the spec defines the integrity layer but doesn't protect against the integrity layer's own blind spots. The constraint enforcement principle must apply to the constrainer too.
|
||||
|
||||
## Design Principle
|
||||
|
||||
The constraint enforcement layer must be **outside** the agent being constrained. That's why multi-model eval matters, why Leo shouldn't eval his own proposals, and why policy-as-code runs in CI, not in the agent's own process. As agents get more capable, the integrity layer gets more important, not less.
|
||||
|
||||
---
|
||||
|
||||
*Authored by Theseus. Reviewed by Leo (proposals integrated). Implementation: Epimetheus.*
|
||||
*Created: 2026-03-31*
|
||||
|
|
@ -0,0 +1,25 @@
|
|||
# Personality layer may need separation from knowledge base
|
||||
|
||||
**Date:** 2026-03-05
|
||||
**Status:** noted
|
||||
|
||||
## The Seam
|
||||
|
||||
`core/collective-agent-core.md` and the Personality sections in `agents/{name}/identity.md` are oriented toward the **product experience** — how the agent talks to users, what voice it has, what it says when challenged.
|
||||
|
||||
The rest of teleo-codex is oriented toward the **operational loop** — how agents propose/evaluate claims, the schema structure, the PR workflow.
|
||||
|
||||
Right now both coexist in the same repo. Fine for v1 where Pentagon agents do both jobs (interact AND maintain the knowledge base).
|
||||
|
||||
## When This Becomes a Problem
|
||||
|
||||
When the product separates the chat interface from the knowledge maintenance:
|
||||
- The **product prompt** loads personality + searches the knowledge base at runtime
|
||||
- The **operational agent** runs the extraction/evaluation loop against the repo
|
||||
- These are different contexts with different performance requirements
|
||||
|
||||
At that point, personality documents should live closer to the product (loaded into system prompt), and the knowledge base should be searched (RAG), not loaded wholesale.
|
||||
|
||||
## Not Blocking
|
||||
|
||||
v1 works fine with both in one repo. Flag this when building the product API layer or when the knowledge base grows large enough that loading it all into context is impractical.
|
||||
32
docs/queue.md
Normal file
32
docs/queue.md
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
# Ops Queue
|
||||
|
||||
Outstanding work items visible to all agents. Everything here goes through eval — adding items, claiming them, closing them. Git history is the audit trail.
|
||||
|
||||
## How it works
|
||||
|
||||
1. **Add items** — any agent can propose new items via PR
|
||||
2. **Claim items** — move status to `claimed` with your name, via PR
|
||||
3. **Close items** — remove the row and note what PR resolved it, via PR
|
||||
4. **Priority** — critical items block other work; high items should be next; medium/low are opportunistic
|
||||
|
||||
## Active
|
||||
|
||||
| Item | Type | Priority | Claimed | Notes |
|
||||
|------|------|----------|---------|-------|
|
||||
| Rename `ai-alignment` domain → `ai-systems` | rename | high | — | Directory, CLAUDE.md, webhook.py domain routing, claim frontmatter, domain map. Support both names during transition. |
|
||||
| 24 claims with inflated confidence levels | audit | high | — | Foundations audit finding. 24 claims rated higher than evidence supports. List in `maps/analytical-toolkit.md` audit section. |
|
||||
| 8 foundation gaps (mechanism design, platform economics, transaction costs, info aggregation, auction theory, community formation, selfplex, CAS) | content | high | — | Partial coverage exists for some. See `maps/analytical-toolkit.md`. |
|
||||
| Update `skills/evaluate.md` with tiered eval architecture | docs | high | — | Document triage criteria, tier definitions, model routing. After Ganymede validates parallel eval pipeline. |
|
||||
| Update `collective-agent-core.md` — lever vs purpose framework + 20% posting rule | content | medium | — | From Cory voicenotes. Lever = the mechanism an agent uses. Purpose = why it exists. 20% of posting should be original synthesis. |
|
||||
| Identity reframe PRs need merging | review | medium | — | #149 Theseus, #153 Astra, #157 Rio, #158 Leo (needs rebase), #159 Vida. All have eval reviews. |
|
||||
| 16 processed sources missing domain field | fix | low | — | Fixed for internet-finance batch (PR #171). Audit remaining sources. |
|
||||
| Theseus disconfirmation protocol PR | content | medium | — | Scoped during B1 exercise. Theseus to propose. |
|
||||
| Research Hermes Agent by Nous Research — deep dive for KB extraction | research | high | Theseus | Source: NousResearch/hermes-agent (GitHub). Research brief in `agents/theseus/musings/research-hermes-agent-nous.md`. **Extract:** (1) Skill extraction as convergent learning mechanism. (2) Self-evolution + human review gates = our governance model. (3) 3+ layer memory convergence. (4) Individual self-improvement ≠ collective knowledge accumulation. (5) Enrich Agentic Taylorism — skills = Taylor's instruction cards. Domains: ai-alignment + collective-intelligence. |
|
||||
|
||||
## Rules
|
||||
|
||||
- **One row per item.** If an item is too big, split it into smaller items.
|
||||
- **Don't hoard claims.** If you claimed something and can't get to it within 2 sessions, unclaim it.
|
||||
- **Close promptly.** When the PR merges, remove the row in the same PR or the next one.
|
||||
- **No duplicates.** Check before adding. If an item is already tracked, update the existing row.
|
||||
- **Critical items first.** If a critical item exists, it takes precedence over all other work.
|
||||
127
docs/schema-change-protocol.md
Normal file
127
docs/schema-change-protocol.md
Normal file
|
|
@ -0,0 +1,127 @@
|
|||
# Schema Change Protocol
|
||||
|
||||
When any agent changes a file format, database table, API response shape, or service configuration that other agents read or consume, those agents need to know before their next session. This protocol prevents silent breakage.
|
||||
|
||||
## The Rule
|
||||
|
||||
**Any PR that changes a schema must:**
|
||||
|
||||
1. **Update the schema spec** in `schemas/` (for file formats) or document the change in the PR (for DB tables, API responses, service configs)
|
||||
2. **Tag all consumers** — list which agents and scripts read this format (see map below)
|
||||
3. **Include a migration note** — what happens to existing data? (backfill on edit, ignore old files, or batch migration)
|
||||
4. **State backward compatibility** — can old-format data still be parsed? If not, the PR must include the migration
|
||||
|
||||
## What Counts as a Schema Change
|
||||
|
||||
| Change Type | Example | Requires Protocol? |
|
||||
|---|---|---|
|
||||
| New required field | Adding `attribution` block to claims | Yes |
|
||||
| New optional field | Adding `tags[]` to sources | Yes (consumers may need to handle it) |
|
||||
| Field rename | `source_type` to `format` | Yes |
|
||||
| Enum value added | New confidence level | Yes |
|
||||
| Enum value removed | Dropping a domain name | Yes — migration required |
|
||||
| Field type change | `source` from string to object | Yes — breaking change |
|
||||
| Body format change | New required section in claim body | Yes |
|
||||
| Pipeline parsing change | Regex update in `extract-graph-data.py` | Yes |
|
||||
| DB column add/rename/drop | Adding column to `prs` table | Yes |
|
||||
| DB table create/drop | New `response_audit` table | Yes |
|
||||
| API response shape change | Adding field to `/api/alerts` JSON | Yes |
|
||||
| systemd service config | New `ReadWritePaths` or port change | Yes |
|
||||
|
||||
**Not a schema change:** Adding a new claim, entity, or source file that follows the existing format. Normal PR workflow applies.
|
||||
|
||||
## Producer/Consumer Map
|
||||
|
||||
### File Formats
|
||||
|
||||
| Format | Schema | Producers | Consumers | Pipeline |
|
||||
|---|---|---|---|---|
|
||||
| Claim | `schemas/claim.md` | All proposers (Rio, Clay, Theseus, Vida, Astra) | Leo (eval), all agents (beliefs), visitors | `extract-graph-data.py` |
|
||||
| Source | `schemas/source.md` | All proposers, Epimetheus (pipeline) | Proposers (extraction), Epimetheus (pipeline) | `extract-cron.sh` |
|
||||
| Entity | `schemas/entity.md` | Domain agents | All agents (references), visitors | `extract-graph-data.py` |
|
||||
| Belief | `schemas/belief.md` | Each agent (own file) | Leo (review), other agents (cross-ref) | None currently |
|
||||
| Position | `schemas/position.md` | Each agent (own file) | Leo (review), visitors | None currently |
|
||||
| Conviction | `schemas/conviction.md` | Cory only | All agents, visitors | `extract-graph-data.py` |
|
||||
| Challenge | `schemas/challenge.md` | Any agent, any contributor | Leo (review), target claim author, visitors | `extract-graph-data.py` |
|
||||
| Divergence | `schemas/divergence.md` | Any agent | All agents, visitors | None currently |
|
||||
| Musing | `schemas/musing.md` | Each agent (own folder) | That agent only | None |
|
||||
| Sector | `schemas/sector.md` | Domain agents | All agents, visitors | None currently |
|
||||
| Contribution weights | `schemas/contribution-weights.yaml` | Cory / Leo | `contributors.json` build | Build script |
|
||||
| Graph data | (derived) | `extract-graph-data.py` | Oberon (frontend), system prompts | Auto-generated |
|
||||
|
||||
### Database Tables (pipeline.db)
|
||||
|
||||
| Table | Producer | Consumers | Notes |
|
||||
|---|---|---|---|
|
||||
| `prs` | Epimetheus (pipeline) | Argus (dashboard), Epimetheus (stale PR detection) | PR tracking, extraction status |
|
||||
| `audit_log` | Epimetheus (pipeline) | Argus (diagnostics) | 5 cols: id/timestamp/stage/event/detail |
|
||||
| `response_audit` | bot.py (runtime) | Argus (dashboard), Oberon (frontend) | Query-response audit trail |
|
||||
| `sources` | Epimetheus (extraction) | Epimetheus (dedup), Argus (metrics) | Source queue and processing status |
|
||||
|
||||
### API Response Shapes
|
||||
|
||||
| Endpoint | Producer | Consumers | Notes |
|
||||
|---|---|---|---|
|
||||
| `/health` | Argus | All agents, monitoring | Service health check |
|
||||
| `/api/alerts` | Argus | Oberon (frontend) | Active alert list |
|
||||
| `/api/activity` | Argus | Oberon (frontend) | Recent pipeline activity |
|
||||
| `/api/failure-report/{agent}` | Argus | Oberon (frontend), agents | Per-agent failure breakdown |
|
||||
| `graph-data.json` | `extract-graph-data.py` | Oberon (frontend) | Knowledge graph visualization data |
|
||||
|
||||
### Service Configuration
|
||||
|
||||
| Config | Owner | Dependents | Notes |
|
||||
|---|---|---|---|
|
||||
| `teleo-pipeline.service` | Rhea | Epimetheus, Argus | ReadWritePaths, ExecStart, ports |
|
||||
| `teleo-diagnostics.service` | Rhea | Argus, Oberon | ReadWritePaths, ports |
|
||||
| `teleo-bot.service` | Rhea | Epimetheus | ReadWritePaths for pipeline.db |
|
||||
|
||||
## How to Tag Consumers
|
||||
|
||||
In the PR body, add a section:
|
||||
|
||||
```
|
||||
## Schema Change
|
||||
|
||||
**Format affected:** claim
|
||||
**Change:** added optional `attribution` block
|
||||
**Backward compatible:** yes — old claims without attribution still parse
|
||||
**Migration:** backfill on next edit (no batch migration needed)
|
||||
**Consumers to notify:** Leo, Rio, Clay, Theseus, Vida, Astra, extract-graph-data.py
|
||||
```
|
||||
|
||||
If the change affects `extract-graph-data.py` or any other pipeline script, the PR must update that script too — don't merge a schema change that breaks the build.
|
||||
|
||||
## Backward Compatibility Rules
|
||||
|
||||
1. **New optional fields** — always backward compatible. Add to schema spec, document default behavior when absent. No migration needed.
|
||||
2. **New required fields** — must include migration. Either batch-update all existing files in the same PR, or make the field optional first and required later after backfill.
|
||||
3. **Field renames** — keep old name as accepted alias in pipeline scripts. Document deprecation. Remove old name only after all files are updated.
|
||||
4. **Enum additions** — backward compatible. Add to schema spec.
|
||||
5. **Enum removals** — breaking. Must migrate all files using the removed value in the same PR.
|
||||
6. **Type changes** — breaking. Must migrate all affected files in the same PR.
|
||||
7. **DB column renames** — treat as breaking. Update all queries in the same PR or add column alias.
|
||||
8. **API response shape changes** — adding fields is backward compatible; removing or renaming fields is breaking.
|
||||
|
||||
## Legacy Aliases (Currently Active)
|
||||
|
||||
These old field names are still accepted by the pipeline. Don't use them in new files, but don't break them in existing files either:
|
||||
|
||||
| Old Name | Current Name | Format |
|
||||
|---|---|---|
|
||||
| `evidence` | `source` | source.md |
|
||||
| `archive` | (removed) | source.md |
|
||||
| `source_type` | `format` | source.md |
|
||||
| `date_published` | `date` | source.md |
|
||||
|
||||
Epimetheus — confirm these are still honored in extraction code. If any are dead, remove from this list.
|
||||
|
||||
## Version Tracking
|
||||
|
||||
No formal version numbers. Schema changes are tracked by:
|
||||
- The PR that made the change (searchable in git history)
|
||||
- The updated schema spec in `schemas/` (for file formats)
|
||||
- The PR description schema change section (for DB/API changes)
|
||||
- The commit message, which should reference the schema change explicitly
|
||||
|
||||
If the system grows to need formal versioning, add a `schema_version` field to frontmatter. Not needed at current scale (~500 claims, 6 agents).
|
||||
169
docs/self-directed-research.md
Normal file
169
docs/self-directed-research.md
Normal file
|
|
@ -0,0 +1,169 @@
|
|||
# Self-Directed Research Architecture
|
||||
|
||||
Draft — Leo, 2026-03-10
|
||||
|
||||
## Core Idea
|
||||
|
||||
Each agent gets a daily research session on the VPS. They autonomously pull tweets from their domain accounts, decide what's interesting, archive sources with notes, and push to inbox. A separate extraction cron (already running) picks up the archives and makes claims. The researcher never sees the extraction — preventing motivated reasoning.
|
||||
|
||||
## Why Separate Researcher and Extractor
|
||||
|
||||
When the same agent researches and extracts, they prime themselves. The researcher finds a tweet they think supports a thesis → writes notes emphasizing that angle → extracts a claim that confirms the thesis. The extraction becomes a formality.
|
||||
|
||||
Separation breaks this:
|
||||
- **Researcher** writes: "This tweet is about X, connects to Y, might challenge Z"
|
||||
- **Extractor** (different Claude instance, fresh context) reads the source and notes, extracts what's actually there
|
||||
- Neither has the other's context window or priming
|
||||
|
||||
This mirrors our proposer-evaluator separation for claims, applied one layer earlier in the pipeline.
|
||||
|
||||
## Architecture
|
||||
|
||||
### Three cron stages on VPS
|
||||
|
||||
```
|
||||
┌─────────────────┐ ┌──────────────────┐ ┌─────────────────┐
|
||||
│ Research Cron │────▶│ Extract Cron │────▶│ Eval Pipeline │
|
||||
│ (daily, 2hr) │ │ (every 5 min) │ │ (webhook.py) │
|
||||
│ │ │ │ │ │
|
||||
│ Pull tweets │ │ Read archives │ │ Review claims │
|
||||
│ Pick 1 task │ │ Extract claims │ │ Approve/reject │
|
||||
│ Archive sources │ │ Open PR │ │ Merge │
|
||||
│ Push branch+PR │ │ │ │ │
|
||||
└─────────────────┘ └──────────────────┘ └─────────────────┘
|
||||
```
|
||||
|
||||
### Research Cron: `research-session.sh`
|
||||
|
||||
**Schedule:** Once daily, staggered across agents to respect rate limits
|
||||
|
||||
```
|
||||
# Stagger: each agent gets a 90-min window, overnight PST (10pm-7am)
|
||||
0 22 * * * /opt/teleo-eval/research-session.sh rio
|
||||
30 23 * * * /opt/teleo-eval/research-session.sh clay
|
||||
0 1 * * * /opt/teleo-eval/research-session.sh theseus
|
||||
30 2 * * * /opt/teleo-eval/research-session.sh vida
|
||||
0 4 * * * /opt/teleo-eval/research-session.sh astra
|
||||
30 5 * * * /opt/teleo-eval/research-session.sh leo
|
||||
```
|
||||
|
||||
**Per agent, the research session (~90 min):**
|
||||
|
||||
1. Pull latest tweets from agent's network accounts (X API)
|
||||
2. Read the agent's beliefs, recent claims, open positions
|
||||
3. Claude prompt: "You are {agent}. Here are your latest tweets from {accounts}. Here is your current knowledge state. Pick ONE research direction that advances your domain understanding. Archive the most relevant sources with notes."
|
||||
4. Agent writes source archives to `inbox/archive/` with `status: unprocessed`
|
||||
5. Commit, push to branch, open PR (source-only, no claims)
|
||||
6. Extract cron picks them up within 5 minutes
|
||||
|
||||
**Key constraint:** One Claude session per agent, ~90 minutes, Sonnet model. Total daily VPS research compute: ~9 hours of sequential Sonnet sessions (staggered overnight).
|
||||
|
||||
### Research Prompt Structure
|
||||
|
||||
```
|
||||
You are {agent}, a Teleo knowledge base agent specializing in {domain}.
|
||||
|
||||
## Your Current State
|
||||
{Read from agents/{agent}/beliefs.md, reasoning.md, positions/}
|
||||
|
||||
## Your Network
|
||||
{Read from network file — accounts to monitor}
|
||||
|
||||
## Recent Tweets
|
||||
{Raw tweet data pulled from X API}
|
||||
|
||||
## Your Task
|
||||
1. Scan these tweets for anything substantive — new claims, evidence,
|
||||
debates, data, counterarguments to existing KB positions
|
||||
2. Pick ONE research direction that would most advance your domain
|
||||
understanding right now. Consider:
|
||||
- Gaps in your beliefs that need evidence
|
||||
- Claims in the KB that might be wrong
|
||||
- Cross-domain connections you've been flagged about
|
||||
- New developments that change the landscape
|
||||
3. Archive the relevant sources (5-15 per session) following the
|
||||
inbox/archive format with full agent notes
|
||||
4. Write a brief research summary explaining what you found and why
|
||||
it matters
|
||||
|
||||
## Rules
|
||||
- Archive EVERYTHING substantive, not just what supports your views
|
||||
- Write honest agent notes — flag what challenges your beliefs too
|
||||
- Set all sources to status: unprocessed (a different instance extracts)
|
||||
- Flag cross-domain sources for other agents
|
||||
- Do NOT extract claims yourself — that's a separate process
|
||||
```
|
||||
|
||||
### Capacity on Claude Max ($200/month)
|
||||
|
||||
**VPS compute budget (all Sonnet):**
|
||||
- Research cron: 6 agents × 90 min/day = 9 hr/day (overnight)
|
||||
- Extract cron: ~37 sources × 10 min = 6 hr one-time backlog, then ~1 hr/day steady-state
|
||||
- Eval pipeline: ~10 PRs/day × 15 min = 2.5 hr/day
|
||||
- **Total VPS:** ~6.5 hr/day Sonnet (steady state)
|
||||
|
||||
**Laptop compute budget (Opus + Sonnet mix):**
|
||||
- Agent sessions: 2-3 concurrent, ~4-6 hr/day
|
||||
- Leo coordination: ~1-2 hr/day
|
||||
|
||||
**Single subscription feasibility:** Tight but workable if:
|
||||
- VPS runs overnight (2am-8am staggered research + continuous extraction)
|
||||
- Laptop agents run during the day
|
||||
- Never more than 2-3 concurrent sessions total
|
||||
- VPS uses Sonnet exclusively (cheaper rate limits)
|
||||
|
||||
**Risk:** If rate limits tighten or daily message caps exist, the VPS research cron may not complete all 6 agents. Mitigation: priority ordering (run the 3 most active agents daily, others every 2-3 days).
|
||||
|
||||
## Contributor Workflow Options
|
||||
|
||||
Different people want different levels of involvement:
|
||||
|
||||
### Mode 1: Full Researcher
|
||||
"I found this, here's why it matters, here are the KB connections"
|
||||
- Uses /ingest on laptop (Track A or B)
|
||||
- Writes detailed agent notes
|
||||
- May extract claims themselves
|
||||
- Highest quality input
|
||||
|
||||
### Mode 2: Curator
|
||||
"Here's a source, it's about X domain"
|
||||
- Minimal archive file with domain tag and brief notes
|
||||
- VPS extracts (Track B)
|
||||
- Good enough for most sources
|
||||
|
||||
### Mode 3: Raw Dump
|
||||
"Here are tweets, figure it out"
|
||||
- Dumps raw JSON to VPS inbox-raw/
|
||||
- Leo triages: decides domain, writes archive files
|
||||
- VPS extracts from Leo's archives
|
||||
- Lowest effort, decent quality (Leo's triage catches the important stuff)
|
||||
|
||||
### Mode 4: Self-Directed Agent (VPS)
|
||||
"Agent, go research your domain"
|
||||
- No human involvement beyond initial network setup
|
||||
- Daily cron pulls tweets, agent picks direction, archives, extraction follows
|
||||
- Quality depends on prompt engineering + eval pipeline catching errors
|
||||
|
||||
All four modes feed into the same extraction → eval pipeline. Quality varies, but the eval pipeline is the quality gate regardless.
|
||||
|
||||
## Open Questions
|
||||
|
||||
1. **Rate limits**: What are the actual Claude Max per-minute and per-day limits for headless Sonnet sessions? Need empirical data from this first extraction run.
|
||||
|
||||
2. **Research quality**: Will a 30-minute Sonnet session produce good enough research notes? Or does research require Opus-level reasoning?
|
||||
|
||||
3. **Network bootstrapping**: Agents need network files. Who curates the initial account lists? (Currently Cory + Leo, eventually agents propose additions)
|
||||
|
||||
4. **Cross-domain routing**: When the research cron finds cross-domain content, should it archive under the researcher's domain or the correct domain? (Probably correct domain with flagged_for_{researcher})
|
||||
|
||||
5. **Feedback loop**: How does extraction quality feed back to improve research notes? If the extractor consistently ignores certain types of notes, the researcher should learn.
|
||||
|
||||
6. **Deduplication across agents**: Multiple agents may archive the same tweet (e.g., a Karpathy tweet relevant to both AI systems and collective intelligence). The extract cron needs to detect this.
|
||||
|
||||
## Implementation Order
|
||||
|
||||
1. ✅ Extract cron (running now — validating extraction quality)
|
||||
2. **Next**: Research cron — daily self-directed sessions per agent
|
||||
3. **Then**: Raw dump path — Leo triage from JSON → archive
|
||||
4. **Later**: Full end-to-end with X API pull integrated into research cron
|
||||
5. **Eventually**: Feedback loops from eval quality → research prompt tuning
|
||||
255
docs/tool-registry-spec.md
Normal file
255
docs/tool-registry-spec.md
Normal file
|
|
@ -0,0 +1,255 @@
|
|||
# Tool Registry Architecture Spec
|
||||
|
||||
**Status:** Approved (Epimetheus review 2026-03-31)
|
||||
**Author:** Ganymede
|
||||
**Date:** 2026-03-31
|
||||
|
||||
## Problem
|
||||
|
||||
Bot.py has four hardcoded tool paths: LEARNING, RESEARCH, SOURCE, CLAIM. Each is a bespoke code path — tag regex in `response.py`, handler function in `bot.py`, side effects scattered across archival, X search, and file creation. Adding a new tool means modifying the LLM prompt, adding a regex, writing a handler, and wiring the audit trail. No gating — every tool fires immediately on tag match.
|
||||
|
||||
## Design
|
||||
|
||||
### Registry Interface
|
||||
|
||||
```python
|
||||
# lib/tool_registry.py
|
||||
|
||||
@dataclass
|
||||
class ToolDef:
|
||||
"""A registered tool that the LLM can invoke via response tags."""
|
||||
name: str # "research", "source", "claim", "learning"
|
||||
description: str # Human-readable, included in LLM prompt
|
||||
tag_prefix: str # "RESEARCH" — literal tag name for parser
|
||||
arg_pattern: str = r"(.+)" # Regex for argument after "TAG: "
|
||||
arg_groups: list[str] = field(default_factory=lambda: ["raw_arg"])
|
||||
prompt_example: str = "" # "RESEARCH: [search query]" — for LLM prompt
|
||||
handler: Callable # async fn(context: ToolContext) -> ToolResult
|
||||
cost: str # "free", "cheap", "expensive" — for eval gating
|
||||
requires_gate: bool # If True, eval pipeline can approve/block
|
||||
strip_from_display: bool = True # Strip tag from user-visible response
|
||||
cooldown_seconds: int = 0 # Per-user cooldown (0 = none)
|
||||
daily_limit: int = 0 # Per-user daily cap (0 = unlimited)
|
||||
|
||||
@dataclass
|
||||
class ToolContext:
|
||||
"""Input to a tool handler."""
|
||||
raw_arg: str # The text after the tag (e.g., search query)
|
||||
user_message: str # Original user message that triggered the response
|
||||
user: str # @username
|
||||
chat_id: int
|
||||
kb_context: str | None # KB context available at response time
|
||||
confidence: float | None # LLM's self-rated confidence
|
||||
|
||||
@dataclass
|
||||
class ToolResult:
|
||||
"""Output from a tool handler."""
|
||||
success: bool
|
||||
message: str | None # Follow-up message to send (None = silent)
|
||||
side_effects: list[str] # ["created:inbox/queue/source.md", "searched:x:query"]
|
||||
audit: dict # Arbitrary data for response_audit.tool_calls
|
||||
|
||||
class ToolRegistry:
|
||||
"""Central registry. Tools register once, available to all agents."""
|
||||
|
||||
def register(self, tool: ToolDef) -> None:
|
||||
"""Register a tool. Raises if name collision."""
|
||||
|
||||
def get(self, name: str) -> ToolDef | None:
|
||||
"""Look up a tool by name."""
|
||||
|
||||
def all_tools(self) -> list[ToolDef]:
|
||||
"""All registered tools, sorted by name."""
|
||||
|
||||
def prompt_block(self) -> str:
|
||||
"""Generate the LLM prompt section describing available tools.
|
||||
Built from registered tool descriptions + tag formats."""
|
||||
|
||||
async def execute(self, name: str, ctx: ToolContext) -> ToolResult:
|
||||
"""Execute a tool. Applies cooldown/limit checks, eval gate, then handler.
|
||||
Registry owns timing — stamps duration_ms, tool name, and timestamp on
|
||||
result.audit automatically. Handlers never touch timing.
|
||||
Raises ToolRateLimited or ToolNotFound on failure."""
|
||||
# Timing is owned here, not by handlers:
|
||||
# start = time.monotonic()
|
||||
# result = await tool.handler(ctx)
|
||||
# result.audit["duration_ms"] = int((time.monotonic() - start) * 1000)
|
||||
# result.audit["tool"] = name
|
||||
# result.audit["ts"] = datetime.now(UTC).isoformat()
|
||||
```
|
||||
|
||||
### Registration
|
||||
|
||||
Tools register at bot startup. No dynamic registration at runtime — the set of available tools is fixed per deploy.
|
||||
|
||||
```python
|
||||
# In bot.py main():
|
||||
from lib.tool_registry import ToolRegistry, ToolDef
|
||||
from telegram.tools import research_tool, source_tool, claim_tool, learning_tool
|
||||
|
||||
registry = ToolRegistry()
|
||||
registry.register(research_tool)
|
||||
registry.register(source_tool)
|
||||
registry.register(claim_tool)
|
||||
registry.register(learning_tool)
|
||||
```
|
||||
|
||||
Each tool is defined in `telegram/tools.py` (or split into `telegram/tools/` if the file grows):
|
||||
|
||||
```python
|
||||
# telegram/tools.py
|
||||
|
||||
research_tool = ToolDef(
|
||||
name="research",
|
||||
description="Search X for recent posts on a topic. Results sent back to chat.",
|
||||
tag_prefix="RESEARCH",
|
||||
arg_pattern=r"(.+)",
|
||||
prompt_example="RESEARCH: [search query]",
|
||||
handler=_handle_research,
|
||||
cost="cheap", # One twitterapi.io call
|
||||
requires_gate=False, # Fire immediately — user expects fast response
|
||||
cooldown_seconds=0,
|
||||
daily_limit=3, # Existing limit from bot.py
|
||||
)
|
||||
|
||||
source_tool = ToolDef(
|
||||
name="source",
|
||||
description="Archive source material contributed by a user.",
|
||||
tag_prefix="SOURCE",
|
||||
arg_pattern=r"(.+)",
|
||||
prompt_example="SOURCE: [description]",
|
||||
handler=_handle_source,
|
||||
cost="free", # File write only
|
||||
requires_gate=False,
|
||||
cooldown_seconds=0,
|
||||
daily_limit=0,
|
||||
)
|
||||
|
||||
claim_tool = ToolDef(
|
||||
name="claim",
|
||||
description="Draft a KB claim from a user's assertion.",
|
||||
tag_prefix="CLAIM",
|
||||
arg_pattern=r"(.+)",
|
||||
prompt_example="CLAIM: [specific assertion]",
|
||||
handler=_handle_claim,
|
||||
cost="free",
|
||||
requires_gate=False,
|
||||
cooldown_seconds=0,
|
||||
daily_limit=0,
|
||||
)
|
||||
|
||||
learning_tool = ToolDef(
|
||||
name="learning",
|
||||
description="Record a correction or new fact from conversation.",
|
||||
tag_prefix="LEARNING",
|
||||
arg_pattern=r"(factual|communication|structured_data)\s+(.+)",
|
||||
arg_groups=["category", "content"],
|
||||
prompt_example="LEARNING: [category] [what was learned]",
|
||||
handler=_handle_learning,
|
||||
cost="free",
|
||||
requires_gate=False,
|
||||
cooldown_seconds=0,
|
||||
daily_limit=0,
|
||||
)
|
||||
```
|
||||
|
||||
### Integration with Decomposed bot.py
|
||||
|
||||
After the 3-module decomposition (bot.py / retrieval.py / response.py), the tool registry slots in cleanly:
|
||||
|
||||
1. **response.py** generates the prompt using `registry.prompt_block()` instead of the hardcoded tag instructions at the end of `build_system_prompt()`.
|
||||
|
||||
2. **response.py** `parse_response()` becomes `parse_response(raw, registry)` — iterates registered tools to find tags via auto-generated regexes:
|
||||
```python
|
||||
for tool in registry.all_tools():
|
||||
pattern = rf'^{tool.tag_prefix}:\s+{tool.arg_pattern}$'
|
||||
matches = re.findall(pattern, raw, re.MULTILINE)
|
||||
```
|
||||
Each tool's `tag_prefix` + `arg_pattern` defines the pattern. LEARNING's multi-group pattern (`(factual|communication|structured_data)\s+(.+)`) works naturally — `re.findall` returns tuples matched to `arg_groups`.
|
||||
|
||||
3. **bot.py** `handle_tagged()` replaces the hardcoded tag-action blocks (lines 1100-1126) with:
|
||||
```python
|
||||
for tool_call in parsed.tool_calls:
|
||||
result = await registry.execute(tool_call.name, tool_call.context)
|
||||
tool_calls_audit.append(result.audit)
|
||||
if result.message:
|
||||
await msg.reply_text(result.message)
|
||||
```
|
||||
|
||||
### Eval Gate Interface
|
||||
|
||||
This is the boundary between Epimetheus's eval pipeline and the tool registry.
|
||||
|
||||
```python
|
||||
# lib/eval_gate.py (owned by Epimetheus)
|
||||
|
||||
class EvalGate:
|
||||
"""Approves or blocks tool calls based on eval policy."""
|
||||
|
||||
async def check(self, tool: ToolDef, ctx: ToolContext) -> GateDecision:
|
||||
"""Returns GateDecision(approved=True/False, reason=str).
|
||||
|
||||
Called by ToolRegistry.execute() when tool.requires_gate is True.
|
||||
Receives full ToolDef so gate can check cost tier without registry lookup.
|
||||
Eval pipeline implements the policy — registry just calls the interface.
|
||||
"""
|
||||
```
|
||||
|
||||
Contract:
|
||||
- `ToolRegistry.execute()` calls `EvalGate.check()` before running any tool with `requires_gate=True`.
|
||||
- If `check()` returns `approved=False`, the tool is not executed and `ToolResult(success=False, message=reason)` is returned.
|
||||
- If `check()` raises or times out (>2s), the tool **executes anyway** with a warning logged. Non-fatal — eval gate failure should not block user-facing responses.
|
||||
- `EvalGate` is injected into `ToolRegistry` at construction time. If no gate is provided, all tools execute unconditionally.
|
||||
|
||||
```python
|
||||
registry = ToolRegistry(gate=EvalGate()) # With gating
|
||||
registry = ToolRegistry() # No gating (default)
|
||||
```
|
||||
|
||||
### Adding a New Tool
|
||||
|
||||
One file change + one registration call:
|
||||
|
||||
1. Define the tool in `telegram/tools.py`:
|
||||
```python
|
||||
new_tool = ToolDef(
|
||||
name="summarize",
|
||||
description="Generate a summary of the current conversation.",
|
||||
tag_prefix="SUMMARIZE",
|
||||
prompt_example="SUMMARIZE: [topic]",
|
||||
handler=_handle_summarize,
|
||||
cost="cheap",
|
||||
requires_gate=True, # Eval reviews before executing
|
||||
)
|
||||
```
|
||||
|
||||
2. Register in `main()`:
|
||||
```python
|
||||
registry.register(new_tool)
|
||||
```
|
||||
|
||||
The LLM prompt, tag parsing, and audit trail all update automatically — no other code changes needed.
|
||||
|
||||
### What This Does NOT Cover
|
||||
|
||||
- **Agent-to-agent tool calls.** This registry is for LLM response tags in the Telegram bot. If agents need to call tools on each other, that's a different system (Pentagon messaging).
|
||||
- **Multi-step tool chains.** Each tool fires independently. If RESEARCH results should feed into a CLAIM, that's handled by conversation context on the next turn, not by chaining tools.
|
||||
- **Tool discovery by the LLM.** The LLM sees all registered tools in the prompt. No dynamic tool selection or function-calling protocol — we use response tags, which are simpler and auditable.
|
||||
|
||||
### Migration Path
|
||||
|
||||
1. Write `lib/tool_registry.py` with `ToolRegistry`, `ToolDef`, `ToolContext`, `ToolResult`.
|
||||
2. Write `telegram/tools.py` with the four existing tools (handlers extracted from bot.py).
|
||||
3. Update `response.py`: `build_system_prompt` uses `registry.prompt_block()`, `parse_response` uses registry for tag patterns.
|
||||
4. Update `bot.py` `handle_tagged`: replace hardcoded tag blocks with `registry.execute()` loop.
|
||||
5. Wire `EvalGate` when Epimetheus's eval pipeline is ready to gate tool calls.
|
||||
|
||||
Steps 1-4 are mechanical extraction. Step 5 depends on Epimetheus defining eval policy for tool calls.
|
||||
|
||||
### Resolved Questions
|
||||
|
||||
1. **Tag regex generation:** Yes — `tag_prefix` + `arg_pattern` on `ToolDef` (structured fields). `parse_response` auto-generates regexes. `prompt_example` is the separate human-readable field for the LLM prompt.
|
||||
|
||||
2. **Tag display suppression:** Yes — `strip_from_display: bool = True` on `ToolDef`. Default True (current behavior). Future tools set False if output should be visible.
|
||||
|
||||
3. **Rate limiting scope:** Per-user-per-day only. No per-chat limits until real usage demands it. `cooldown_seconds` + `daily_limit` covers current requirements.
|
||||
841
fetch_coins.py
Normal file
841
fetch_coins.py
Normal file
|
|
@ -0,0 +1,841 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Ownership Coin Portfolio Data Fetcher
|
||||
|
||||
Reads entity files for token addresses, fetches current and historical
|
||||
price data from DexScreener and CoinGecko, stores daily snapshots in
|
||||
pipeline.db coin_snapshots table.
|
||||
|
||||
Usage:
|
||||
python3 fetch_coins.py --daily # Today's snapshot (current prices + on-chain)
|
||||
python3 fetch_coins.py --backfill # Historical daily prices from CoinGecko
|
||||
python3 fetch_coins.py --backfill-days 90 # Last N days only
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import datetime
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import sqlite3
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import urllib.request
|
||||
import base58
|
||||
import yaml
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s %(levelname)s %(message)s",
|
||||
)
|
||||
logger = logging.getLogger("fetch_coins")
|
||||
|
||||
MAIN_WORKTREE = Path(os.environ.get("MAIN_WORKTREE", "/opt/teleo-eval/workspaces/main"))
|
||||
DB_PATH = Path(os.environ.get("DB_PATH", "/opt/teleo-eval/pipeline/pipeline.db"))
|
||||
ENTITY_DIR = MAIN_WORKTREE / "entities" / "internet-finance"
|
||||
|
||||
DEXSCREENER_TOKEN_URL = "https://api.dexscreener.com/tokens/v1/solana/{mint}"
|
||||
COINGECKO_HISTORY_URL = (
|
||||
"https://api.coingecko.com/api/v3/coins/solana/contract/{mint}"
|
||||
"/market_chart?vs_currency=usd&days={days}"
|
||||
)
|
||||
COINGECKO_RATE_LIMIT = 6.0 # seconds between requests (free tier — 10-15 req/min)
|
||||
|
||||
USDC_MINT = "EPjFWdd5AufqSSqeM2qN1xzybapC8G4wEGGkZwyTDt1v"
|
||||
SOLANA_RPC = "https://api.mainnet-beta.solana.com"
|
||||
|
||||
|
||||
def _http_get_json(url, retries=3, timeout=15):
|
||||
for attempt in range(retries + 1):
|
||||
try:
|
||||
req = urllib.request.Request(url, headers={
|
||||
"Accept": "application/json",
|
||||
"User-Agent": "teleo-portfolio/1.0",
|
||||
})
|
||||
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||
return json.loads(resp.read())
|
||||
except urllib.error.HTTPError as e:
|
||||
if e.code == 429 and attempt < retries:
|
||||
wait = 15 * (attempt + 1)
|
||||
logger.info("Rate limited, waiting %ds...", wait)
|
||||
time.sleep(wait)
|
||||
continue
|
||||
logger.warning("HTTP %d for %s", e.code, url[:80])
|
||||
return None
|
||||
except Exception as e:
|
||||
if attempt < retries:
|
||||
time.sleep(2 ** attempt)
|
||||
continue
|
||||
logger.warning("HTTP GET failed after %d attempts: %s — %s", retries + 1, url[:80], e)
|
||||
return None
|
||||
|
||||
|
||||
def load_ownership_coins():
|
||||
"""Read entity files and return list of coin dicts with chain data."""
|
||||
coins = []
|
||||
for f in sorted(ENTITY_DIR.glob("*.md")):
|
||||
content = f.read_text()
|
||||
if "---" not in content:
|
||||
continue
|
||||
parts = content.split("---", 2)
|
||||
if len(parts) < 3:
|
||||
continue
|
||||
try:
|
||||
fm = yaml.safe_load(parts[1])
|
||||
except Exception:
|
||||
continue
|
||||
if not isinstance(fm, dict):
|
||||
continue
|
||||
if fm.get("subtype") != "ownership-coin":
|
||||
continue
|
||||
if fm.get("status") == "liquidated":
|
||||
continue
|
||||
|
||||
chain = fm.get("chain") or {}
|
||||
if isinstance(chain, str):
|
||||
chain = {}
|
||||
raise_data = fm.get("raise") or {}
|
||||
ops = fm.get("operations") or {}
|
||||
liq = fm.get("liquidation") or {}
|
||||
|
||||
coins.append({
|
||||
"name": fm.get("name", f.stem),
|
||||
"ticker": fm.get("ticker"),
|
||||
"status": fm.get("status", "unknown"),
|
||||
"token_mint": chain.get("token_mint"),
|
||||
"treasury_multisig": chain.get("treasury_multisig"),
|
||||
"lp_pools": chain.get("lp_pools") or [],
|
||||
"vesting_wallets": chain.get("vesting_wallets") or [],
|
||||
"investor_locked_tokens": chain.get("investor_locked_tokens") or 0,
|
||||
"meteora_seed_tokens": chain.get("meteora_seed_tokens") or 0,
|
||||
"initial_price": raise_data.get("initial_token_price_usd"),
|
||||
"amount_raised": raise_data.get("amount_raised_usd"),
|
||||
"monthly_allowance": ops.get("monthly_allowance_usd"),
|
||||
"liquidation_date": liq.get("date"),
|
||||
"liquidation_return": liq.get("return_per_dollar"),
|
||||
"file": f.name,
|
||||
})
|
||||
|
||||
return coins
|
||||
|
||||
|
||||
def ensure_schema(conn):
|
||||
"""Create coin_snapshots table if it doesn't exist."""
|
||||
conn.execute("""
|
||||
CREATE TABLE IF NOT EXISTS coin_snapshots (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
snapshot_date TEXT NOT NULL,
|
||||
name TEXT NOT NULL,
|
||||
ticker TEXT,
|
||||
token_mint TEXT,
|
||||
status TEXT,
|
||||
price_usd REAL,
|
||||
market_cap_usd REAL,
|
||||
fdv_usd REAL,
|
||||
circulating_supply REAL,
|
||||
total_supply REAL,
|
||||
volume_24h_usd REAL,
|
||||
liquidity_usd REAL,
|
||||
treasury_multisig_usd REAL,
|
||||
lp_usdc_total REAL,
|
||||
lp_pools_detail TEXT,
|
||||
equity_value_usd REAL,
|
||||
initial_price_usd REAL,
|
||||
amount_raised_usd REAL,
|
||||
monthly_allowance_usd REAL,
|
||||
effective_liq_price REAL,
|
||||
delta_pct REAL,
|
||||
months_runway REAL,
|
||||
protocol_owned_tokens REAL,
|
||||
adjusted_circulating_supply REAL,
|
||||
data_source TEXT,
|
||||
fetched_at TEXT NOT NULL,
|
||||
UNIQUE(snapshot_date, name)
|
||||
)
|
||||
""")
|
||||
# Legacy migration — these columns exist in CREATE TABLE but may be missing in older DBs
|
||||
for col in ("protocol_owned_tokens", "adjusted_circulating_supply", "treasury_protocol_tokens", "vesting_tokens"):
|
||||
try:
|
||||
conn.execute(f"ALTER TABLE coin_snapshots ADD COLUMN {col} REAL")
|
||||
except sqlite3.OperationalError:
|
||||
pass
|
||||
conn.execute("""
|
||||
CREATE INDEX IF NOT EXISTS idx_coin_snapshots_date
|
||||
ON coin_snapshots(snapshot_date)
|
||||
""")
|
||||
conn.execute("""
|
||||
CREATE INDEX IF NOT EXISTS idx_coin_snapshots_name
|
||||
ON coin_snapshots(name)
|
||||
""")
|
||||
conn.commit()
|
||||
|
||||
|
||||
def fetch_dexscreener(mint):
|
||||
"""Get current price, mcap, fdv, volume, liquidity from DexScreener."""
|
||||
url = DEXSCREENER_TOKEN_URL.format(mint=mint)
|
||||
data = _http_get_json(url)
|
||||
if not data:
|
||||
return None
|
||||
|
||||
pairs = data if isinstance(data, list) else data.get("pairs", [])
|
||||
if not pairs:
|
||||
return None
|
||||
|
||||
# Use highest-liquidity pair
|
||||
best = max(pairs, key=lambda p: (p.get("liquidity") or {}).get("usd", 0))
|
||||
liq = best.get("liquidity") or {}
|
||||
|
||||
return {
|
||||
"price_usd": float(best["priceUsd"]) if best.get("priceUsd") else None,
|
||||
"market_cap_usd": best.get("marketCap"),
|
||||
"fdv_usd": best.get("fdv"),
|
||||
"volume_24h_usd": (best.get("volume") or {}).get("h24"),
|
||||
"liquidity_usd": liq.get("usd"),
|
||||
"circulating_supply": None, # DexScreener doesn't provide this directly
|
||||
"total_supply": None,
|
||||
}
|
||||
|
||||
|
||||
def fetch_coingecko_history(mint, days=365):
|
||||
"""Get daily price history from CoinGecko."""
|
||||
url = COINGECKO_HISTORY_URL.format(mint=mint, days=days)
|
||||
data = _http_get_json(url)
|
||||
if not data or "prices" not in data:
|
||||
return []
|
||||
|
||||
daily = {}
|
||||
for ts_ms, price in data["prices"]:
|
||||
dt = datetime.datetime.fromtimestamp(ts_ms / 1000, tz=datetime.timezone.utc)
|
||||
date_str = dt.strftime("%Y-%m-%d")
|
||||
daily[date_str] = price # last value for that day wins (CoinGecko returns multiple per day)
|
||||
|
||||
market_caps = {}
|
||||
for ts_ms, mc in data.get("market_caps", []):
|
||||
dt = datetime.datetime.fromtimestamp(ts_ms / 1000, tz=datetime.timezone.utc)
|
||||
date_str = dt.strftime("%Y-%m-%d")
|
||||
market_caps[date_str] = mc
|
||||
|
||||
volumes = {}
|
||||
for ts_ms, vol in data.get("total_volumes", []):
|
||||
dt = datetime.datetime.fromtimestamp(ts_ms / 1000, tz=datetime.timezone.utc)
|
||||
date_str = dt.strftime("%Y-%m-%d")
|
||||
volumes[date_str] = vol
|
||||
|
||||
result = []
|
||||
for date_str in sorted(daily.keys()):
|
||||
result.append({
|
||||
"date": date_str,
|
||||
"price_usd": daily[date_str],
|
||||
"market_cap_usd": market_caps.get(date_str),
|
||||
"volume_24h_usd": volumes.get(date_str),
|
||||
})
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def fetch_solana_token_supply(mint):
|
||||
"""Get token supply from Solana RPC."""
|
||||
payload = {
|
||||
"jsonrpc": "2.0",
|
||||
"id": 1,
|
||||
"method": "getTokenSupply",
|
||||
"params": [mint],
|
||||
}
|
||||
req = urllib.request.Request(
|
||||
SOLANA_RPC,
|
||||
data=json.dumps(payload).encode(),
|
||||
headers={"Content-Type": "application/json"},
|
||||
)
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=10) as resp:
|
||||
data = json.loads(resp.read())
|
||||
val = data.get("result", {}).get("value", {})
|
||||
amount = val.get("uiAmount")
|
||||
return {"total_supply": amount}
|
||||
except Exception as e:
|
||||
logger.warning("Solana RPC getTokenSupply failed for %s: %s", mint[:12], e)
|
||||
return {}
|
||||
|
||||
|
||||
def fetch_solana_usdc_balance(wallet_address):
|
||||
"""Get USDC balance for a wallet from Solana RPC."""
|
||||
if not wallet_address:
|
||||
return None
|
||||
payload = {
|
||||
"jsonrpc": "2.0",
|
||||
"id": 1,
|
||||
"method": "getTokenAccountsByOwner",
|
||||
"params": [
|
||||
wallet_address,
|
||||
{"mint": USDC_MINT},
|
||||
{"encoding": "jsonParsed"},
|
||||
],
|
||||
}
|
||||
req = urllib.request.Request(
|
||||
SOLANA_RPC,
|
||||
data=json.dumps(payload).encode(),
|
||||
headers={"Content-Type": "application/json"},
|
||||
)
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=10) as resp:
|
||||
data = json.loads(resp.read())
|
||||
accounts = data.get("result", {}).get("value", [])
|
||||
total = 0.0
|
||||
for acct in accounts:
|
||||
info = acct.get("account", {}).get("data", {}).get("parsed", {}).get("info", {})
|
||||
token_amount = info.get("tokenAmount", {})
|
||||
total += float(token_amount.get("uiAmount", 0))
|
||||
return total
|
||||
except Exception as e:
|
||||
logger.warning("Solana RPC USDC balance failed for %s: %s", wallet_address[:12], e)
|
||||
return None
|
||||
|
||||
|
||||
def fetch_solana_token_balance(wallet_address, token_mint):
|
||||
"""Get balance of a specific SPL token for a wallet from Solana RPC."""
|
||||
if not wallet_address or not token_mint:
|
||||
return None
|
||||
payload = {
|
||||
"jsonrpc": "2.0",
|
||||
"id": 1,
|
||||
"method": "getTokenAccountsByOwner",
|
||||
"params": [
|
||||
wallet_address,
|
||||
{"mint": token_mint},
|
||||
{"encoding": "jsonParsed"},
|
||||
],
|
||||
}
|
||||
for attempt in range(3):
|
||||
req = urllib.request.Request(
|
||||
SOLANA_RPC,
|
||||
data=json.dumps(payload).encode(),
|
||||
headers={"Content-Type": "application/json"},
|
||||
)
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=10) as resp:
|
||||
data = json.loads(resp.read())
|
||||
if "error" in data:
|
||||
code = data["error"].get("code", 0)
|
||||
if code == 429 and attempt < 2:
|
||||
wait = 10 * (attempt + 1)
|
||||
logger.info("RPC rate limited for %s, retrying in %ds...", wallet_address[:12], wait)
|
||||
time.sleep(wait)
|
||||
continue
|
||||
logger.warning("RPC error for %s: %s", wallet_address[:12], data["error"])
|
||||
return None
|
||||
accounts = data.get("result", {}).get("value", [])
|
||||
total = 0.0
|
||||
for acct in accounts:
|
||||
info = acct.get("account", {}).get("data", {}).get("parsed", {}).get("info", {})
|
||||
token_amount = info.get("tokenAmount", {})
|
||||
total += float(token_amount.get("uiAmount", 0))
|
||||
return total
|
||||
except urllib.error.HTTPError as e:
|
||||
if e.code == 429 and attempt < 2:
|
||||
wait = 10 * (attempt + 1)
|
||||
logger.info("RPC 429 for %s, retrying in %ds...", wallet_address[:12], wait)
|
||||
time.sleep(wait)
|
||||
continue
|
||||
logger.warning("Solana RPC token balance failed for %s (mint %s): %s",
|
||||
wallet_address[:12], token_mint[:12], e)
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.warning("Solana RPC token balance failed for %s (mint %s): %s",
|
||||
wallet_address[:12], token_mint[:12], e)
|
||||
return None
|
||||
return None
|
||||
|
||||
|
||||
|
||||
# Meteora program IDs
|
||||
METEORA_CPAMM = "cpamdpZCGKUy5JxQXB4dcpGPiikHawvSWAd6mEn1sGG"
|
||||
METEORA_DLMM = "LBUZKhRxPF3XUpBCjp4YzTKgLccjZhTSDM9YuVaPwxo"
|
||||
# CPAMM: vault_a at byte 232, vault_b at byte 264
|
||||
# DLMM: reserve_x at byte 152, reserve_y at byte 184
|
||||
|
||||
def _resolve_meteora_vaults(pool_address):
|
||||
"""For Meteora pools, read account data to find actual token vaults.
|
||||
|
||||
Returns (vault_a_addr, vault_b_addr, program_type) or (None, None, None).
|
||||
"""
|
||||
import base64
|
||||
payload = {
|
||||
"jsonrpc": "2.0", "id": 1,
|
||||
"method": "getAccountInfo",
|
||||
"params": [pool_address, {"encoding": "base64"}],
|
||||
}
|
||||
for attempt in range(3):
|
||||
try:
|
||||
req = urllib.request.Request(
|
||||
SOLANA_RPC,
|
||||
data=json.dumps(payload).encode(),
|
||||
headers={"Content-Type": "application/json"},
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=15) as resp:
|
||||
data = json.loads(resp.read())
|
||||
if "error" in data:
|
||||
code = data["error"].get("code", 0)
|
||||
if code == 429 and attempt < 2:
|
||||
time.sleep(10 * (attempt + 1))
|
||||
continue
|
||||
return None, None, None
|
||||
val = data.get("result", {}).get("value")
|
||||
if not val:
|
||||
return None, None, None
|
||||
owner = val.get("owner", "")
|
||||
raw = base64.b64decode(val["data"][0])
|
||||
|
||||
if owner == METEORA_CPAMM and len(raw) >= 296:
|
||||
va = base58.b58encode(raw[232:264]).decode()
|
||||
vb = base58.b58encode(raw[264:296]).decode()
|
||||
return va, vb, "cpamm"
|
||||
elif owner == METEORA_DLMM and len(raw) >= 216:
|
||||
va = base58.b58encode(raw[152:184]).decode()
|
||||
vb = base58.b58encode(raw[184:216]).decode()
|
||||
return va, vb, "dlmm"
|
||||
return None, None, None
|
||||
except urllib.error.HTTPError as e:
|
||||
if e.code == 429 and attempt < 2:
|
||||
time.sleep(10 * (attempt + 1))
|
||||
continue
|
||||
return None, None, None
|
||||
except Exception:
|
||||
return None, None, None
|
||||
return None, None, None
|
||||
|
||||
|
||||
def _fetch_vault_balance(vault_address):
|
||||
"""Get token balance from a vault/reserve account. Returns (mint, amount) or (None, 0)."""
|
||||
payload = {
|
||||
"jsonrpc": "2.0", "id": 1,
|
||||
"method": "getAccountInfo",
|
||||
"params": [vault_address, {"encoding": "jsonParsed"}],
|
||||
}
|
||||
for attempt in range(3):
|
||||
try:
|
||||
req = urllib.request.Request(
|
||||
SOLANA_RPC,
|
||||
data=json.dumps(payload).encode(),
|
||||
headers={"Content-Type": "application/json"},
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=15) as resp:
|
||||
data = json.loads(resp.read())
|
||||
if "error" in data:
|
||||
code = data["error"].get("code", 0)
|
||||
if code == 429 and attempt < 2:
|
||||
time.sleep(10 * (attempt + 1))
|
||||
continue
|
||||
return None, 0.0
|
||||
val = data.get("result", {}).get("value")
|
||||
if not val or not isinstance(val.get("data"), dict):
|
||||
return None, 0.0
|
||||
info = val["data"]["parsed"]["info"]
|
||||
mint = info["mint"]
|
||||
amt = float(info["tokenAmount"]["uiAmountString"])
|
||||
return mint, amt
|
||||
except urllib.error.HTTPError as e:
|
||||
if e.code == 429 and attempt < 2:
|
||||
time.sleep(10 * (attempt + 1))
|
||||
continue
|
||||
return None, 0.0
|
||||
except Exception:
|
||||
return None, 0.0
|
||||
return None, 0.0
|
||||
|
||||
|
||||
def fetch_lp_wallet_balances(lp_pools, token_mint):
|
||||
"""Query LP wallets for USDC balance and protocol-owned tokens.
|
||||
|
||||
Returns (lp_usdc_total, protocol_owned_tokens, lp_details_list).
|
||||
"""
|
||||
if not lp_pools:
|
||||
return 0.0, 0.0, []
|
||||
|
||||
total_usdc = 0.0
|
||||
total_protocol_tokens = 0.0
|
||||
details = []
|
||||
|
||||
for pool in lp_pools:
|
||||
address = pool.get("address")
|
||||
dex = pool.get("dex", "unknown")
|
||||
if not address:
|
||||
continue
|
||||
|
||||
pool_usdc = 0.0
|
||||
pool_tokens = 0.0
|
||||
|
||||
# Try Meteora vault resolution first (CPAMM + DLMM)
|
||||
if dex == "meteora":
|
||||
vault_a, vault_b, prog_type = _resolve_meteora_vaults(address)
|
||||
if vault_a and vault_b:
|
||||
logger.info("Meteora %s pool %s: vaults %s, %s", prog_type, address[:12], vault_a[:12], vault_b[:12])
|
||||
time.sleep(2)
|
||||
for vault_addr in [vault_a, vault_b]:
|
||||
mint, amt = _fetch_vault_balance(vault_addr)
|
||||
if mint and amt > 0:
|
||||
if mint == USDC_MINT:
|
||||
pool_usdc += amt
|
||||
elif token_mint and mint == token_mint:
|
||||
pool_tokens += amt
|
||||
time.sleep(2)
|
||||
else:
|
||||
logger.warning("Meteora vault resolution failed for %s, falling back to getTokenAccountsByOwner", address[:12])
|
||||
|
||||
# Fallback: getTokenAccountsByOwner (works for futarchy-amm and non-Meteora pools)
|
||||
if pool_usdc == 0 and pool_tokens == 0:
|
||||
payload = {
|
||||
"jsonrpc": "2.0",
|
||||
"id": 1,
|
||||
"method": "getTokenAccountsByOwner",
|
||||
"params": [
|
||||
address,
|
||||
{"programId": "TokenkegQfeZyiNwAJbNbGKPFXCWuBvf9Ss623VQ5DA"},
|
||||
{"encoding": "jsonParsed"},
|
||||
],
|
||||
}
|
||||
for attempt in range(3):
|
||||
try:
|
||||
req = urllib.request.Request(
|
||||
SOLANA_RPC,
|
||||
data=json.dumps(payload).encode(),
|
||||
headers={"Content-Type": "application/json"},
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=15) as resp:
|
||||
data = json.loads(resp.read())
|
||||
if "error" in data:
|
||||
code = data["error"].get("code", 0)
|
||||
if code == 429 and attempt < 2:
|
||||
logger.info("RPC rate limited for %s, retrying in %ds...", address[:12], 5 * (attempt + 1))
|
||||
time.sleep(10 * (attempt + 1))
|
||||
continue
|
||||
logger.warning("RPC error for LP %s: %s", address[:12], data["error"])
|
||||
break
|
||||
for acct in data.get("result", {}).get("value", []):
|
||||
info = acct["account"]["data"]["parsed"]["info"]
|
||||
mint = info["mint"]
|
||||
amt = float(info["tokenAmount"]["uiAmountString"])
|
||||
if amt == 0:
|
||||
continue
|
||||
if mint == USDC_MINT:
|
||||
pool_usdc += amt
|
||||
elif token_mint and mint == token_mint:
|
||||
pool_tokens += amt
|
||||
break
|
||||
except urllib.error.HTTPError as e:
|
||||
if e.code == 429 and attempt < 2:
|
||||
wait = 5 * (attempt + 1)
|
||||
logger.info("RPC 429 for %s, retrying in %ds...", address[:12], wait)
|
||||
time.sleep(wait * 2)
|
||||
continue
|
||||
logger.warning("LP wallet query failed for %s (%s): %s", dex, address[:12], e)
|
||||
break
|
||||
except Exception as e:
|
||||
logger.warning("LP wallet query failed for %s (%s): %s", dex, address[:12], e)
|
||||
break
|
||||
|
||||
total_usdc += pool_usdc
|
||||
total_protocol_tokens += pool_tokens
|
||||
details.append({
|
||||
"dex": dex,
|
||||
"address": address,
|
||||
"usdc": round(pool_usdc, 2),
|
||||
"protocol_tokens": round(pool_tokens, 2),
|
||||
})
|
||||
time.sleep(5)
|
||||
|
||||
return total_usdc, total_protocol_tokens, details
|
||||
|
||||
|
||||
def compute_derived(row, coin):
|
||||
"""Compute effective liquidation price, delta, equity, runway."""
|
||||
price = row.get("price_usd")
|
||||
treasury = row.get("treasury_multisig_usd") or 0
|
||||
lp_total = row.get("lp_usdc_total") or 0
|
||||
mcap = row.get("market_cap_usd") or 0
|
||||
monthly = coin.get("monthly_allowance")
|
||||
protocol_tokens = row.get("protocol_owned_tokens") or 0
|
||||
total_supply = row.get("total_supply")
|
||||
|
||||
cash_total = treasury + lp_total
|
||||
|
||||
adj_circ = row.get("adjusted_circulating_supply")
|
||||
if not adj_circ and total_supply and total_supply > 0:
|
||||
adj_circ = total_supply - protocol_tokens
|
||||
row["adjusted_circulating_supply"] = adj_circ
|
||||
|
||||
if adj_circ and adj_circ > 0:
|
||||
row["effective_liq_price"] = cash_total / adj_circ
|
||||
if price and price > 0:
|
||||
original_mcap = row.get("market_cap_usd")
|
||||
row["market_cap_usd"] = price * adj_circ
|
||||
mcap = row["market_cap_usd"]
|
||||
if original_mcap and abs(mcap - original_mcap) > 1:
|
||||
logger.debug("%s: adjusted mcap $%.0f (was $%.0f, protocol_owned=%s)",
|
||||
row.get("name", "?"), mcap, original_mcap, protocol_tokens)
|
||||
if price and price > 0 and row.get("effective_liq_price"):
|
||||
row["delta_pct"] = ((row["effective_liq_price"] / price) - 1) * 100
|
||||
|
||||
row["equity_value_usd"] = mcap - cash_total if mcap else None
|
||||
|
||||
if monthly and monthly > 0 and treasury:
|
||||
row["months_runway"] = treasury / monthly
|
||||
|
||||
return row
|
||||
|
||||
|
||||
def upsert_snapshot(conn, row):
|
||||
"""Insert or replace a daily snapshot."""
|
||||
conn.execute("""
|
||||
INSERT OR REPLACE INTO coin_snapshots (
|
||||
snapshot_date, name, ticker, token_mint, status,
|
||||
price_usd, market_cap_usd, fdv_usd,
|
||||
circulating_supply, total_supply,
|
||||
volume_24h_usd, liquidity_usd,
|
||||
treasury_multisig_usd, lp_usdc_total, lp_pools_detail,
|
||||
equity_value_usd, initial_price_usd, amount_raised_usd,
|
||||
monthly_allowance_usd, effective_liq_price, delta_pct,
|
||||
months_runway, protocol_owned_tokens, adjusted_circulating_supply,
|
||||
treasury_protocol_tokens, vesting_tokens,
|
||||
data_source, fetched_at
|
||||
) VALUES (
|
||||
:snapshot_date, :name, :ticker, :token_mint, :status,
|
||||
:price_usd, :market_cap_usd, :fdv_usd,
|
||||
:circulating_supply, :total_supply,
|
||||
:volume_24h_usd, :liquidity_usd,
|
||||
:treasury_multisig_usd, :lp_usdc_total, :lp_pools_detail,
|
||||
:equity_value_usd, :initial_price_usd, :amount_raised_usd,
|
||||
:monthly_allowance_usd, :effective_liq_price, :delta_pct,
|
||||
:months_runway, :protocol_owned_tokens, :adjusted_circulating_supply,
|
||||
:treasury_protocol_tokens, :vesting_tokens,
|
||||
:data_source, :fetched_at
|
||||
)
|
||||
""", row)
|
||||
|
||||
|
||||
def cmd_daily(coins, conn):
|
||||
"""Fetch current data for all coins and store today's snapshot."""
|
||||
today = datetime.date.today().isoformat()
|
||||
now = datetime.datetime.now(datetime.timezone.utc).isoformat()
|
||||
|
||||
for coin in coins:
|
||||
mint = coin["token_mint"]
|
||||
if not mint:
|
||||
logger.info("Skipping %s — no token mint", coin["name"])
|
||||
continue
|
||||
|
||||
logger.info("Fetching %s (%s)...", coin["name"], coin["ticker"])
|
||||
|
||||
# Current price from DexScreener
|
||||
dex = fetch_dexscreener(mint)
|
||||
if not dex:
|
||||
logger.warning("DexScreener returned nothing for %s — trying last known price", coin["name"])
|
||||
last_row = conn.execute(
|
||||
"SELECT price_usd FROM coin_snapshots WHERE name=? AND price_usd IS NOT NULL ORDER BY snapshot_date DESC LIMIT 1",
|
||||
(coin["name"],)
|
||||
).fetchone()
|
||||
if last_row and last_row[0]:
|
||||
dex = {"price_usd": last_row[0], "market_cap_usd": None, "fdv_usd": None, "volume_24h_usd": None, "liquidity_usd": None, "circulating_supply": None, "total_supply": None}
|
||||
logger.info(" Using last known price: $%.4f", last_row[0])
|
||||
else:
|
||||
logger.warning(" No historical price either — skipping %s", coin["name"])
|
||||
continue
|
||||
|
||||
# Token supply from Solana RPC
|
||||
supply = fetch_solana_token_supply(mint)
|
||||
time.sleep(4)
|
||||
|
||||
# Treasury USDC balance + protocol token balance
|
||||
treasury_usd = None
|
||||
treasury_tokens = 0.0
|
||||
if coin.get("treasury_multisig"):
|
||||
treasury_usd = fetch_solana_usdc_balance(coin["treasury_multisig"])
|
||||
time.sleep(2)
|
||||
treas_tok = fetch_solana_token_balance(coin["treasury_multisig"], mint)
|
||||
if treas_tok and treas_tok > 0:
|
||||
treasury_tokens = treas_tok
|
||||
logger.info(" %s treasury holds %.0f protocol tokens", coin["name"], treasury_tokens)
|
||||
time.sleep(2)
|
||||
|
||||
time.sleep(4)
|
||||
|
||||
# Vesting wallet scanning — tokens locked in vesting contracts
|
||||
vesting_tokens = 0.0
|
||||
if coin.get("vesting_wallets"):
|
||||
for vw in coin["vesting_wallets"]:
|
||||
vw_addr = vw.get("address") if isinstance(vw, dict) else vw
|
||||
if not vw_addr:
|
||||
continue
|
||||
vt = fetch_solana_token_balance(vw_addr, mint)
|
||||
if vt and vt > 0:
|
||||
vesting_tokens += vt
|
||||
label = vw.get("label", vw_addr[:12]) if isinstance(vw, dict) else vw_addr[:12]
|
||||
logger.info(" %s vesting wallet (%s) holds %.0f tokens", coin["name"], label, vt)
|
||||
time.sleep(2)
|
||||
|
||||
# LP pool balances — query each wallet for USDC + protocol-owned tokens
|
||||
lp_total = 0.0
|
||||
protocol_tokens = 0.0
|
||||
lp_detail = None
|
||||
if coin.get("lp_pools"):
|
||||
lp_total, protocol_tokens, lp_details_list = fetch_lp_wallet_balances(
|
||||
coin["lp_pools"], mint
|
||||
)
|
||||
lp_detail = json.dumps(lp_details_list) if lp_details_list else None
|
||||
|
||||
total_supply = supply.get("total_supply")
|
||||
|
||||
# Adjusted circulating supply: total - LP tokens - treasury tokens
|
||||
investor_locked = float(coin.get("investor_locked_tokens") or 0)
|
||||
meteora_seed = float(coin.get("meteora_seed_tokens") or 0)
|
||||
all_protocol_tokens = protocol_tokens + treasury_tokens + vesting_tokens + investor_locked + meteora_seed
|
||||
if investor_locked > 0:
|
||||
logger.info(" %s investor locked tokens: %.0f", coin["name"], investor_locked)
|
||||
if meteora_seed > 0:
|
||||
logger.info(" %s meteora seed tokens: %.0f", coin["name"], meteora_seed)
|
||||
adj_circ = None
|
||||
if total_supply and total_supply > 0:
|
||||
adj_circ = total_supply - all_protocol_tokens
|
||||
|
||||
# If we have adj_circ and price but no mcap, compute from adjusted supply
|
||||
if adj_circ and dex.get("price_usd"):
|
||||
dex["market_cap_usd"] = adj_circ * dex["price_usd"]
|
||||
elif total_supply and dex.get("price_usd") and not dex.get("market_cap_usd"):
|
||||
dex["market_cap_usd"] = total_supply * dex["price_usd"]
|
||||
|
||||
row = {
|
||||
"snapshot_date": today,
|
||||
"name": coin["name"],
|
||||
"ticker": coin["ticker"],
|
||||
"token_mint": mint,
|
||||
"status": coin["status"],
|
||||
"price_usd": dex.get("price_usd"),
|
||||
"market_cap_usd": dex.get("market_cap_usd"),
|
||||
"fdv_usd": dex.get("fdv_usd"),
|
||||
"circulating_supply": dex.get("circulating_supply"),
|
||||
"total_supply": total_supply,
|
||||
"volume_24h_usd": dex.get("volume_24h_usd"),
|
||||
"liquidity_usd": dex.get("liquidity_usd"),
|
||||
"treasury_multisig_usd": treasury_usd,
|
||||
"lp_usdc_total": lp_total if lp_total else None,
|
||||
"lp_pools_detail": lp_detail,
|
||||
"equity_value_usd": None,
|
||||
"initial_price_usd": coin.get("initial_price"),
|
||||
"amount_raised_usd": coin.get("amount_raised"),
|
||||
"monthly_allowance_usd": coin.get("monthly_allowance"),
|
||||
"effective_liq_price": None,
|
||||
"delta_pct": None,
|
||||
"months_runway": None,
|
||||
"protocol_owned_tokens": all_protocol_tokens if all_protocol_tokens else None,
|
||||
"treasury_protocol_tokens": treasury_tokens if treasury_tokens else None,
|
||||
"vesting_tokens": vesting_tokens if vesting_tokens else None,
|
||||
"adjusted_circulating_supply": adj_circ,
|
||||
"data_source": "dexscreener+solana_rpc",
|
||||
"fetched_at": now,
|
||||
}
|
||||
|
||||
row = compute_derived(row, coin)
|
||||
upsert_snapshot(conn, row)
|
||||
lp_msg = f" lp_usdc=${row.get('lp_usdc_total') or 0:,.0f} lp_tokens={protocol_tokens:,.0f} treas_tokens={treasury_tokens:,.0f}" if row.get("lp_usdc_total") or treasury_tokens else ""
|
||||
logger.info(" %s: $%.4f mcap=$%s adj_circ=%s%s",
|
||||
coin["name"], row["price_usd"] or 0,
|
||||
f'{row["market_cap_usd"]:,.0f}' if row["market_cap_usd"] else "N/A",
|
||||
f'{row["adjusted_circulating_supply"]:,.0f}' if row.get("adjusted_circulating_supply") else "N/A",
|
||||
lp_msg)
|
||||
time.sleep(1)
|
||||
|
||||
conn.commit()
|
||||
logger.info("Daily snapshot complete for %s", today)
|
||||
|
||||
|
||||
def cmd_backfill(coins, conn, days=365):
|
||||
"""Backfill historical daily prices from CoinGecko."""
|
||||
now = datetime.datetime.now(datetime.timezone.utc).isoformat()
|
||||
|
||||
for coin in coins:
|
||||
mint = coin["token_mint"]
|
||||
if not mint:
|
||||
logger.info("Skipping %s — no token mint", coin["name"])
|
||||
continue
|
||||
|
||||
logger.info("Backfilling %s (%s) — %d days...", coin["name"], coin["ticker"], days)
|
||||
history = fetch_coingecko_history(mint, days=days)
|
||||
|
||||
if not history:
|
||||
logger.warning("No CoinGecko history for %s", coin["name"])
|
||||
time.sleep(COINGECKO_RATE_LIMIT)
|
||||
continue
|
||||
|
||||
inserted = 0
|
||||
for point in history:
|
||||
row = {
|
||||
"snapshot_date": point["date"],
|
||||
"name": coin["name"],
|
||||
"ticker": coin["ticker"],
|
||||
"token_mint": mint,
|
||||
"status": coin["status"],
|
||||
"price_usd": point["price_usd"],
|
||||
"market_cap_usd": point.get("market_cap_usd"),
|
||||
"fdv_usd": None,
|
||||
"circulating_supply": None,
|
||||
"total_supply": None,
|
||||
"volume_24h_usd": point.get("volume_24h_usd"),
|
||||
"liquidity_usd": None,
|
||||
"treasury_multisig_usd": None,
|
||||
"lp_usdc_total": None,
|
||||
"lp_pools_detail": None,
|
||||
"equity_value_usd": None,
|
||||
"initial_price_usd": coin.get("initial_price"),
|
||||
"amount_raised_usd": coin.get("amount_raised"),
|
||||
"monthly_allowance_usd": coin.get("monthly_allowance"),
|
||||
"effective_liq_price": None,
|
||||
"delta_pct": None,
|
||||
"months_runway": None,
|
||||
"protocol_owned_tokens": None,
|
||||
"adjusted_circulating_supply": None,
|
||||
"treasury_protocol_tokens": None,
|
||||
"vesting_tokens": None,
|
||||
"data_source": "coingecko_history",
|
||||
"fetched_at": now,
|
||||
}
|
||||
upsert_snapshot(conn, row)
|
||||
inserted += 1
|
||||
|
||||
conn.commit()
|
||||
logger.info(" %s: %d daily snapshots inserted", coin["name"], inserted)
|
||||
time.sleep(COINGECKO_RATE_LIMIT)
|
||||
|
||||
logger.info("Backfill complete")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Ownership coin portfolio data fetcher")
|
||||
parser.add_argument("--daily", action="store_true", help="Fetch today's snapshot")
|
||||
parser.add_argument("--backfill", action="store_true", help="Backfill historical prices")
|
||||
parser.add_argument("--backfill-days", type=int, default=365, help="Days to backfill (default: 365)")
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.daily and not args.backfill:
|
||||
parser.error("Specify --daily or --backfill")
|
||||
|
||||
coins = load_ownership_coins()
|
||||
logger.info("Loaded %d ownership coins (%d with token mints)",
|
||||
len(coins), sum(1 for c in coins if c["token_mint"]))
|
||||
|
||||
conn = sqlite3.connect(str(DB_PATH), timeout=30)
|
||||
conn.execute("PRAGMA journal_mode=WAL")
|
||||
conn.execute("PRAGMA busy_timeout=30000")
|
||||
ensure_schema(conn)
|
||||
|
||||
try:
|
||||
if args.backfill:
|
||||
cmd_backfill(coins, conn, days=args.backfill_days)
|
||||
if args.daily:
|
||||
cmd_daily(coins, conn)
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
52
hermes-agent/GMAIL-SETUP.md
Normal file
52
hermes-agent/GMAIL-SETUP.md
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
# Gmail Setup for Hermes Agent
|
||||
|
||||
## Step 1: Create Google Cloud OAuth Credentials (~5 min)
|
||||
|
||||
1. Go to [console.cloud.google.com](https://console.cloud.google.com)
|
||||
2. Create a new project (or use existing): "Hermes Assistant"
|
||||
3. Enable these APIs:
|
||||
- Gmail API
|
||||
- Google Calendar API
|
||||
- Google Drive API (optional)
|
||||
4. Go to **APIs & Services → Credentials → Create Credentials → OAuth 2.0 Client ID**
|
||||
5. Application type: **Desktop app**
|
||||
6. Name: "Hermes Agent"
|
||||
7. Download the JSON file → save as `~/.hermes/google-credentials.json`
|
||||
|
||||
## Step 2: Configure Hermes
|
||||
|
||||
Add to `~/.hermes/.env`:
|
||||
|
||||
```
|
||||
GOOGLE_CLIENT_ID=your-client-id.apps.googleusercontent.com
|
||||
GOOGLE_CLIENT_SECRET=your-client-secret
|
||||
```
|
||||
|
||||
Or place the downloaded JSON at `~/.hermes/google-credentials.json`.
|
||||
|
||||
## Step 3: Authorize
|
||||
|
||||
```bash
|
||||
hermes setup google-workspace
|
||||
```
|
||||
|
||||
This opens a browser auth flow (or gives you a URL to paste). Sign in with
|
||||
m3taversal@gmail.com and grant permissions. Token is saved locally.
|
||||
|
||||
Since this is a VPS (no browser), you'll get a URL — open it on your laptop,
|
||||
authorize, paste the code back into the terminal.
|
||||
|
||||
## Step 4: Test
|
||||
|
||||
```bash
|
||||
hermes "Show me my last 5 emails"
|
||||
hermes "What's on my calendar today?"
|
||||
hermes "Draft a reply to the last email from [name]"
|
||||
```
|
||||
|
||||
## Security Notes
|
||||
|
||||
- OAuth tokens stored locally in `~/.hermes/` (chmod 600)
|
||||
- Hermes only accesses what you authorized — revoke anytime at
|
||||
[myaccount.google.com/permissions](https://myaccount.google.com/permissions)
|
||||
- The VPS is SSH-only access, no public web ports exposed to Hermes
|
||||
113
hermes-agent/install-hermes.sh
Executable file
113
hermes-agent/install-hermes.sh
Executable file
|
|
@ -0,0 +1,113 @@
|
|||
#!/usr/bin/env bash
|
||||
# Install Hermes Agent on Teleo VPS (CAX31, ARM64, Ubuntu)
|
||||
# Run as: teleo user
|
||||
# Prereqs: Python 3.11+, Node.js 22+, git
|
||||
set -euo pipefail
|
||||
|
||||
HERMES_HOME="$HOME/.hermes"
|
||||
OPENROUTER_KEY_FILE="/opt/teleo-eval/secrets/openrouter-key"
|
||||
|
||||
echo "=== Hermes Agent Install for Teleo VPS ==="
|
||||
|
||||
# 1. Check prereqs
|
||||
echo "[1/6] Checking prerequisites..."
|
||||
python3 --version || { echo "ERROR: Python 3.11+ required"; exit 1; }
|
||||
node --version || { echo "ERROR: Node.js 22+ required"; exit 1; }
|
||||
git --version || { echo "ERROR: git required"; exit 1; }
|
||||
|
||||
# 2. Install Hermes
|
||||
echo "[2/6] Installing Hermes Agent..."
|
||||
if command -v hermes &>/dev/null; then
|
||||
echo "Hermes already installed, upgrading..."
|
||||
pip3 install --upgrade hermes-agent
|
||||
else
|
||||
curl -fsSL https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.sh | bash
|
||||
# Source the updated PATH
|
||||
export PATH="$HOME/.local/bin:$PATH"
|
||||
fi
|
||||
|
||||
# 3. Create config directory
|
||||
echo "[3/6] Setting up config..."
|
||||
mkdir -p "$HERMES_HOME"
|
||||
|
||||
# 4. Write .env with OpenRouter key (read from existing pipeline secret)
|
||||
if [ -f "$OPENROUTER_KEY_FILE" ]; then
|
||||
OPENROUTER_KEY=$(cat "$OPENROUTER_KEY_FILE")
|
||||
cat > "$HERMES_HOME/.env" << EOF
|
||||
OPENROUTER_API_KEY=${OPENROUTER_KEY}
|
||||
EOF
|
||||
chmod 600 "$HERMES_HOME/.env"
|
||||
echo " OpenRouter key loaded from pipeline secrets"
|
||||
else
|
||||
echo " WARNING: No OpenRouter key found at $OPENROUTER_KEY_FILE"
|
||||
echo " You'll need to manually add OPENROUTER_API_KEY to $HERMES_HOME/.env"
|
||||
fi
|
||||
|
||||
# 5. Write config.yaml
|
||||
echo "[4/6] Writing config.yaml..."
|
||||
cat > "$HERMES_HOME/config.yaml" << 'EOF'
|
||||
# Hermes Agent config — Teleo VPS
|
||||
model:
|
||||
provider: openrouter
|
||||
default: anthropic/claude-sonnet-4-6
|
||||
smart_routing: true
|
||||
smart_routing_model: google/gemini-2.5-flash
|
||||
|
||||
terminal:
|
||||
backend: native
|
||||
|
||||
memory:
|
||||
enabled: true
|
||||
search: sqlite_fts5
|
||||
|
||||
tools:
|
||||
web_search: true
|
||||
browser: true
|
||||
file_ops: true
|
||||
terminal: true
|
||||
vision: false
|
||||
image_gen: false
|
||||
tts: false
|
||||
|
||||
gateway:
|
||||
telegram:
|
||||
enabled: false # Enable after setting BOT_TOKEN below
|
||||
# bot_token: "YOUR_TELEGRAM_BOT_TOKEN"
|
||||
EOF
|
||||
|
||||
# 6. Write SOUL.md
|
||||
echo "[5/6] Writing SOUL.md..."
|
||||
cat > "$HERMES_HOME/SOUL.md" << 'EOF'
|
||||
You are Cory's personal AI assistant running on the Teleo VPS.
|
||||
|
||||
Your owner is Cory Abdalla — founder of Metaversal, building LivingIP
|
||||
(a collective intelligence system for investment research).
|
||||
|
||||
You help with:
|
||||
- Email triage and drafting (when Gmail is connected)
|
||||
- Calendar management
|
||||
- Web research and summarization
|
||||
- Quick tasks and reminders
|
||||
- Anything Cory asks
|
||||
|
||||
Style: Direct, concise, no fluff. Cory is technical — skip explanations
|
||||
of basic concepts. When uncertain, say so rather than guessing.
|
||||
|
||||
You are NOT part of the LivingIP pipeline. You're a separate personal
|
||||
assistant. Don't try to interact with Forgejo, pipeline.db, or the
|
||||
teleo-codex unless Cory specifically asks.
|
||||
EOF
|
||||
|
||||
echo "[6/6] Done!"
|
||||
echo ""
|
||||
echo "=== Next Steps ==="
|
||||
echo "1. Test: hermes 'hello, what model are you using?'"
|
||||
echo "2. Gmail: hermes setup google-workspace (needs OAuth credentials)"
|
||||
echo "3. Telegram: Create bot via @BotFather, add token to config.yaml,"
|
||||
echo " then: hermes gateway start"
|
||||
echo "4. Cron: hermes cron add '0 8 * * *' 'Check my calendar and summarize today'"
|
||||
echo ""
|
||||
echo "Config: $HERMES_HOME/config.yaml"
|
||||
echo "Memory: $HERMES_HOME/MEMORY.md"
|
||||
echo "Skills: $HERMES_HOME/skills/"
|
||||
EOF
|
||||
|
|
@ -21,6 +21,92 @@ logger = logging.getLogger("pipeline.attribution")
|
|||
|
||||
VALID_ROLES = frozenset({"sourcer", "extractor", "challenger", "synthesizer", "reviewer"})
|
||||
|
||||
# Agent-owned branch prefixes — PRs from these branches get Pentagon-Agent trailer
|
||||
# credit for challenger/synthesizer roles. Pipeline-infra branches (extract/ reweave/
|
||||
# fix/ ingestion/) are deliberately excluded: they're automation, not contribution.
|
||||
# Single source of truth; imported by contributor.py and backfill-events.py.
|
||||
AGENT_BRANCH_PREFIXES = (
|
||||
"rio/", "theseus/", "leo/", "vida/", "astra/", "clay/", "oberon/",
|
||||
)
|
||||
|
||||
# Handle sanity: lowercase alphanumerics, hyphens, underscores. 1-39 chars (matches
|
||||
# GitHub's handle rules). Rejects garbage like "governance---meritocratic-voting-+-futarchy"
|
||||
# or "sec-interpretive-release-s7-2026-09-(march-17" that upstream frontmatter hygiene
|
||||
# bugs produce. Apply at parse time so bad handles never reach the contributors table.
|
||||
_HANDLE_RE = re.compile(r"^[a-z0-9][a-z0-9_-]{0,38}$")
|
||||
|
||||
|
||||
def _valid_handle(handle: str) -> bool:
|
||||
"""Return True if handle matches the handle format (alphanum + _-, ≤39 chars)."""
|
||||
if not handle or not isinstance(handle, str):
|
||||
return False
|
||||
h = handle.strip().lower().lstrip("@")
|
||||
if h.endswith("-") or h.endswith("_"):
|
||||
return False
|
||||
return bool(_HANDLE_RE.match(h))
|
||||
|
||||
|
||||
def _filter_valid_handles(result: dict) -> dict:
|
||||
"""Drop entries with invalid handles from a parsed attribution dict."""
|
||||
filtered: dict[str, list[dict]] = {role: [] for role in VALID_ROLES}
|
||||
for role, entries in result.items():
|
||||
for entry in entries:
|
||||
if _valid_handle(entry.get("handle", "")):
|
||||
filtered[role].append(entry)
|
||||
return filtered
|
||||
|
||||
|
||||
# ─── Handle normalization + kind classification (schema v24) ──────────────
|
||||
|
||||
# Known Pentagon agents. Used to classify contributor kind='agent' so the
|
||||
# leaderboard can filter them out of the default person view.
|
||||
PENTAGON_AGENTS = frozenset({
|
||||
"rio", "leo", "theseus", "vida", "clay", "astra",
|
||||
"oberon", "argus", "rhea", "ganymede", "epimetheus", "hermes", "ship",
|
||||
"pipeline", # pipeline-owned commits (extract/*, reweave/*, fix/*)
|
||||
})
|
||||
|
||||
|
||||
def normalize_handle(handle: str, conn=None) -> str:
|
||||
"""Canonicalize a handle: lowercase, strip @, resolve alias if conn provided.
|
||||
|
||||
Examples:
|
||||
'@thesensatore' → 'thesensatore'
|
||||
'Cameron' → 'cameron' → 'cameron-s1' (via alias if seeded)
|
||||
'CNBC' → 'cnbc'
|
||||
|
||||
Always lowercases and strips @ prefix. Alias resolution requires a conn
|
||||
argument (not always available at parse time; merge-time writer passes it).
|
||||
"""
|
||||
if not handle:
|
||||
return ""
|
||||
h = handle.strip().lower().lstrip("@")
|
||||
if conn is None:
|
||||
return h
|
||||
try:
|
||||
row = conn.execute(
|
||||
"SELECT canonical FROM contributor_aliases WHERE alias = ?", (h,),
|
||||
).fetchone()
|
||||
if row:
|
||||
return row["canonical"] if isinstance(row, dict) or hasattr(row, "keys") else row[0]
|
||||
except Exception:
|
||||
# Alias table might not exist yet on pre-v24 DBs — degrade gracefully.
|
||||
logger.debug("normalize_handle: alias lookup failed for %r", h, exc_info=True)
|
||||
return h
|
||||
|
||||
|
||||
def classify_kind(handle: str) -> str:
|
||||
"""Return 'agent' for known Pentagon agents, 'person' otherwise.
|
||||
|
||||
The 'org' kind (CNBC, SpaceNews, etc.) is assigned by operator review,
|
||||
not inferred here. Keeping heuristics narrow: we know our own agents;
|
||||
everything else defaults to person until explicitly classified.
|
||||
"""
|
||||
h = handle.strip().lower().lstrip("@")
|
||||
if h in PENTAGON_AGENTS:
|
||||
return "agent"
|
||||
return "person"
|
||||
|
||||
|
||||
# ─── Parse attribution from claim content ──────────────────────────────────
|
||||
|
||||
|
|
@ -51,7 +137,11 @@ def parse_attribution(fm: dict) -> dict[str, list[dict]]:
|
|||
elif isinstance(entries, str):
|
||||
# Single entry as string
|
||||
result[role].append({"handle": entries.strip().lower().lstrip("@"), "agent_id": None, "context": None})
|
||||
return result
|
||||
# Fall through to the filter at the end (don't early-return). The nested
|
||||
# block path was skipping the handle sanity filter, letting garbage like
|
||||
# "senator-elissa-slotkin-/-the-hill" through when it was written into
|
||||
# frontmatter during the legacy-fallback era.
|
||||
return _filter_valid_handles(result)
|
||||
|
||||
# Flat format fallback (attribution_sourcer, attribution_extractor, etc.)
|
||||
for role in VALID_ROLES:
|
||||
|
|
@ -64,22 +154,40 @@ def parse_attribution(fm: dict) -> dict[str, list[dict]]:
|
|||
if isinstance(v, str):
|
||||
result[role].append({"handle": v.strip().lower().lstrip("@"), "agent_id": None, "context": None})
|
||||
|
||||
# Legacy fallback: infer from source field
|
||||
if not any(result[r] for r in VALID_ROLES):
|
||||
source = fm.get("source", "")
|
||||
if isinstance(source, str) and source:
|
||||
# Try to extract author handle from source string
|
||||
# Patterns: "@handle", "Author Name", "org, description"
|
||||
handle_match = re.search(r"@(\w+)", source)
|
||||
if handle_match:
|
||||
result["sourcer"].append({"handle": handle_match.group(1).lower(), "agent_id": None, "context": source})
|
||||
else:
|
||||
# Use first word/phrase before comma as sourcer handle
|
||||
author = source.split(",")[0].strip().lower().replace(" ", "-")
|
||||
if author and len(author) > 1:
|
||||
result["sourcer"].append({"handle": author, "agent_id": None, "context": source})
|
||||
# Bare-key flat format: `sourcer: alexastrum`, `extractor: leo`, etc.
|
||||
# This is what extract.py writes (line 290: f'sourcer: "{sourcer}"') — the most
|
||||
# common format in practice (~42% of claim files). The Apr 24 incident traced
|
||||
# missing leaderboard entries to this format being silently dropped because the
|
||||
# parser only checked the `attribution_*` prefix.
|
||||
# Only fill if the role wasn't already populated by the prefixed form, to avoid
|
||||
# double-counting when both formats coexist on the same claim.
|
||||
for role in VALID_ROLES:
|
||||
if result[role]:
|
||||
continue
|
||||
bare_val = fm.get(role)
|
||||
if isinstance(bare_val, str) and bare_val.strip():
|
||||
result[role].append({"handle": bare_val.strip().lower().lstrip("@"), "agent_id": None, "context": None})
|
||||
elif isinstance(bare_val, list):
|
||||
for v in bare_val:
|
||||
if isinstance(v, str) and v.strip():
|
||||
result[role].append({"handle": v.strip().lower().lstrip("@"), "agent_id": None, "context": None})
|
||||
elif isinstance(v, dict) and v.get("handle"):
|
||||
result[role].append({
|
||||
"handle": v["handle"].strip().lower().lstrip("@"),
|
||||
"agent_id": v.get("agent_id"),
|
||||
"context": v.get("context"),
|
||||
})
|
||||
|
||||
return result
|
||||
# Legacy `source` heuristic REMOVED (Ganymede review, Apr 24). It fabricated
|
||||
# handles from descriptive source strings — "governance---meritocratic-voting-+-
|
||||
# futarchy", "cameron-(contributor)", "sec-interpretive-release-s7-2026-09-
|
||||
# (march-17". Hit rate on real handles was near-zero, false-positive rate was
|
||||
# high. Claims without explicit attribution now return empty (better surface as
|
||||
# data hygiene than invent fake contributors).
|
||||
|
||||
# Filter to valid handles only. Bad handles (garbage from upstream frontmatter
|
||||
# bugs) get dropped rather than written to the contributors table.
|
||||
return _filter_valid_handles(result)
|
||||
|
||||
|
||||
def parse_attribution_from_file(filepath: str) -> dict[str, list[dict]]:
|
||||
|
|
@ -100,12 +208,15 @@ def parse_attribution_from_file(filepath: str) -> dict[str, list[dict]]:
|
|||
# ─── Validate attribution ──────────────────────────────────────────────────
|
||||
|
||||
|
||||
def validate_attribution(fm: dict) -> list[str]:
|
||||
def validate_attribution(fm: dict, agent: str | None = None) -> list[str]:
|
||||
"""Validate attribution block in claim frontmatter.
|
||||
|
||||
Returns list of issues. Block on missing extractor, warn on missing sourcer.
|
||||
(Leo: extractor is always known, sourcer is best-effort.)
|
||||
|
||||
If agent is provided and extractor is missing, auto-fix by setting the
|
||||
agent as extractor (same pattern as created-date auto-fix).
|
||||
|
||||
Only validates if an attribution block is explicitly present. Legacy claims
|
||||
without attribution blocks are not blocked — they'll get attribution when
|
||||
enriched. New claims from v2 extraction always have attribution.
|
||||
|
|
@ -123,7 +234,16 @@ def validate_attribution(fm: dict) -> list[str]:
|
|||
attribution = parse_attribution(fm)
|
||||
|
||||
if not attribution["extractor"]:
|
||||
issues.append("missing_attribution_extractor")
|
||||
if agent:
|
||||
# Auto-fix: set the processing agent as extractor
|
||||
attr = fm.get("attribution")
|
||||
if isinstance(attr, dict):
|
||||
attr["extractor"] = [{"handle": agent}]
|
||||
else:
|
||||
fm["attribution"] = {"extractor": [{"handle": agent}]}
|
||||
issues.append("fixed_missing_extractor")
|
||||
else:
|
||||
issues.append("missing_attribution_extractor")
|
||||
|
||||
return issues
|
||||
|
||||
|
|
|
|||
282
lib/cascade.py
Normal file
282
lib/cascade.py
Normal file
|
|
@ -0,0 +1,282 @@
|
|||
"""Cascade automation — auto-flag dependent beliefs/positions when claims change.
|
||||
|
||||
Hook point: called from merge.py after _embed_merged_claims, before _delete_remote_branch.
|
||||
Uses the same main_sha/branch_sha diff to detect changed claim files, then scans
|
||||
all agent beliefs and positions for depends_on references to those claims.
|
||||
|
||||
Notifications are written to /opt/teleo-eval/agent-state/{agent}/inbox/ using
|
||||
the same atomic-write pattern as lib-state.sh.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import secrets
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import tempfile
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
logger = logging.getLogger("pipeline.cascade")
|
||||
|
||||
AGENT_STATE_DIR = Path("/opt/teleo-eval/agent-state")
|
||||
CLAIM_DIRS = {"domains/", "core/", "foundations/", "decisions/"}
|
||||
AGENT_NAMES = ["rio", "leo", "clay", "astra", "vida", "theseus"]
|
||||
|
||||
|
||||
def _extract_claim_titles_from_diff(diff_files: list[str]) -> set[str]:
|
||||
"""Extract claim titles from changed file paths."""
|
||||
titles = set()
|
||||
for fpath in diff_files:
|
||||
if not fpath.endswith(".md"):
|
||||
continue
|
||||
if not any(fpath.startswith(d) for d in CLAIM_DIRS):
|
||||
continue
|
||||
basename = os.path.basename(fpath)
|
||||
if basename.startswith("_") or basename == "directory.md":
|
||||
continue
|
||||
title = basename.removesuffix(".md")
|
||||
titles.add(title)
|
||||
return titles
|
||||
|
||||
|
||||
def _normalize_for_match(text: str) -> str:
|
||||
"""Normalize for fuzzy matching: lowercase, hyphens to spaces, strip punctuation, collapse whitespace."""
|
||||
text = text.lower().strip()
|
||||
text = text.replace("-", " ")
|
||||
text = re.sub(r"[^\w\s]", "", text)
|
||||
text = re.sub(r"\s+", " ", text)
|
||||
return text
|
||||
|
||||
|
||||
def _slug_to_words(slug: str) -> str:
|
||||
"""Convert kebab-case slug to space-separated words."""
|
||||
return slug.replace("-", " ")
|
||||
|
||||
|
||||
def _parse_depends_on(file_path: Path) -> tuple[str, list[str]]:
|
||||
"""Parse a belief or position file's depends_on entries.
|
||||
|
||||
Returns (agent_name, [dependency_titles]).
|
||||
"""
|
||||
try:
|
||||
content = file_path.read_text(encoding="utf-8")
|
||||
except (OSError, UnicodeDecodeError):
|
||||
return ("", [])
|
||||
|
||||
agent = ""
|
||||
deps = []
|
||||
in_frontmatter = False
|
||||
in_depends = False
|
||||
|
||||
for line in content.split("\n"):
|
||||
if line.strip() == "---":
|
||||
if not in_frontmatter:
|
||||
in_frontmatter = True
|
||||
continue
|
||||
else:
|
||||
break
|
||||
|
||||
if in_frontmatter:
|
||||
if line.startswith("agent:"):
|
||||
agent = line.split(":", 1)[1].strip().strip('"').strip("'")
|
||||
elif line.startswith("depends_on:"):
|
||||
in_depends = True
|
||||
rest = line.split(":", 1)[1].strip()
|
||||
if rest.startswith("["):
|
||||
items = re.findall(r'"([^"]+)"|\'([^\']+)\'', rest)
|
||||
for item in items:
|
||||
dep = item[0] or item[1]
|
||||
dep = dep.strip("[]").replace("[[", "").replace("]]", "")
|
||||
deps.append(dep)
|
||||
in_depends = False
|
||||
elif in_depends:
|
||||
if line.startswith(" - "):
|
||||
dep = line.strip().lstrip("- ").strip('"').strip("'")
|
||||
dep = dep.replace("[[", "").replace("]]", "")
|
||||
deps.append(dep)
|
||||
elif line.strip() and not line.startswith(" "):
|
||||
in_depends = False
|
||||
|
||||
# Also scan body for [[wiki-links]]
|
||||
body_links = re.findall(r"\[\[([^\]]+)\]\]", content)
|
||||
for link in body_links:
|
||||
if link not in deps:
|
||||
deps.append(link)
|
||||
|
||||
return (agent, deps)
|
||||
|
||||
|
||||
def _write_inbox_message(agent: str, subject: str, body: str) -> bool:
|
||||
"""Write a cascade notification to an agent's inbox. Atomic tmp+rename."""
|
||||
inbox_dir = AGENT_STATE_DIR / agent / "inbox"
|
||||
if not inbox_dir.exists():
|
||||
logger.warning("cascade: no inbox dir for agent %s, skipping", agent)
|
||||
return False
|
||||
|
||||
ts = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S")
|
||||
nonce = secrets.token_hex(3)
|
||||
filename = f"cascade-{ts}-{nonce}-{subject[:60]}.md"
|
||||
final_path = inbox_dir / filename
|
||||
|
||||
try:
|
||||
fd, tmp_path = tempfile.mkstemp(dir=str(inbox_dir), suffix=".tmp")
|
||||
with os.fdopen(fd, "w") as f:
|
||||
f.write(f"---\n")
|
||||
f.write(f"type: cascade\n")
|
||||
f.write(f"from: pipeline\n")
|
||||
f.write(f"to: {agent}\n")
|
||||
f.write(f"subject: \"{subject}\"\n")
|
||||
f.write(f"created: {datetime.now(timezone.utc).isoformat()}\n")
|
||||
f.write(f"status: unread\n")
|
||||
f.write(f"---\n\n")
|
||||
f.write(body)
|
||||
os.rename(tmp_path, str(final_path))
|
||||
return True
|
||||
except OSError:
|
||||
logger.exception("cascade: failed to write inbox message for %s", agent)
|
||||
return False
|
||||
|
||||
|
||||
def _find_matches(deps: list[str], claim_lookup: dict[str, str]) -> list[str]:
|
||||
"""Check if any dependency matches a changed claim.
|
||||
|
||||
Uses exact normalized match first, then substring containment for longer
|
||||
strings only (min 15 chars) to avoid false positives on short generic names.
|
||||
"""
|
||||
matched = []
|
||||
for dep in deps:
|
||||
norm = _normalize_for_match(dep)
|
||||
if norm in claim_lookup:
|
||||
matched.append(claim_lookup[norm])
|
||||
else:
|
||||
# Substring match only for sufficiently specific strings
|
||||
shorter = min(len(norm), min((len(k) for k in claim_lookup), default=0))
|
||||
if shorter >= 15:
|
||||
for claim_norm, claim_orig in claim_lookup.items():
|
||||
if claim_norm in norm or norm in claim_norm:
|
||||
matched.append(claim_orig)
|
||||
break
|
||||
return matched
|
||||
|
||||
|
||||
def _format_cascade_body(
|
||||
file_name: str,
|
||||
file_type: str,
|
||||
matched_claims: list[str],
|
||||
pr_num: int,
|
||||
) -> str:
|
||||
"""Format the cascade notification body."""
|
||||
claims_list = "\n".join(f"- {c}" for c in matched_claims)
|
||||
return (
|
||||
f"# Cascade: upstream claims changed\n\n"
|
||||
f"Your {file_type} **{file_name}** depends on claims that were modified in PR #{pr_num}.\n\n"
|
||||
f"## Changed claims\n\n{claims_list}\n\n"
|
||||
f"## Action needed\n\n"
|
||||
f"Review whether your {file_type}'s confidence, description, or grounding "
|
||||
f"needs updating in light of these changes. If the evidence strengthened, "
|
||||
f"consider increasing confidence. If it weakened or contradicted, flag for "
|
||||
f"re-evaluation.\n"
|
||||
)
|
||||
|
||||
|
||||
async def cascade_after_merge(
|
||||
main_sha: str,
|
||||
branch_sha: str,
|
||||
pr_num: int,
|
||||
main_worktree: Path,
|
||||
conn=None,
|
||||
) -> int:
|
||||
"""Scan for beliefs/positions affected by claims changed in this merge.
|
||||
|
||||
Returns the number of cascade notifications sent.
|
||||
"""
|
||||
# 1. Get changed files
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
"git", "diff", "--name-only", "--diff-filter=ACMR",
|
||||
main_sha, branch_sha,
|
||||
cwd=str(main_worktree),
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
)
|
||||
try:
|
||||
stdout, _ = await asyncio.wait_for(proc.communicate(), timeout=10)
|
||||
except asyncio.TimeoutError:
|
||||
proc.kill()
|
||||
await proc.wait()
|
||||
logger.warning("cascade: git diff timed out")
|
||||
return 0
|
||||
|
||||
if proc.returncode != 0:
|
||||
logger.warning("cascade: git diff failed (rc=%d)", proc.returncode)
|
||||
return 0
|
||||
|
||||
diff_files = [f for f in stdout.decode().strip().split("\n") if f]
|
||||
|
||||
# 2. Extract claim titles from changed files
|
||||
changed_claims = _extract_claim_titles_from_diff(diff_files)
|
||||
if not changed_claims:
|
||||
return 0
|
||||
|
||||
logger.info("cascade: %d claims changed in PR #%d: %s",
|
||||
len(changed_claims), pr_num, list(changed_claims)[:5])
|
||||
|
||||
# Build normalized lookup for fuzzy matching
|
||||
claim_lookup = {}
|
||||
for claim in changed_claims:
|
||||
claim_lookup[_normalize_for_match(claim)] = claim
|
||||
claim_lookup[_normalize_for_match(_slug_to_words(claim))] = claim
|
||||
|
||||
# 3. Scan all beliefs and positions
|
||||
notifications = 0
|
||||
notification_details = [] # Per-agent reasoning for audit trail
|
||||
agents_dir = main_worktree / "agents"
|
||||
if not agents_dir.exists():
|
||||
logger.warning("cascade: no agents/ dir in worktree")
|
||||
return 0
|
||||
|
||||
for agent_name in AGENT_NAMES:
|
||||
agent_dir = agents_dir / agent_name
|
||||
if not agent_dir.exists():
|
||||
continue
|
||||
|
||||
for subdir, file_type in [("beliefs", "belief"), ("positions", "position")]:
|
||||
target_dir = agent_dir / subdir
|
||||
if not target_dir.exists():
|
||||
continue
|
||||
for md_file in target_dir.glob("*.md"):
|
||||
_, deps = _parse_depends_on(md_file)
|
||||
matched = _find_matches(deps, claim_lookup)
|
||||
if matched:
|
||||
body = _format_cascade_body(md_file.name, file_type, matched, pr_num)
|
||||
if _write_inbox_message(agent_name, f"claim-changed-affects-{file_type}", body):
|
||||
notifications += 1
|
||||
notification_details.append({
|
||||
"agent": agent_name,
|
||||
"file_type": file_type,
|
||||
"file": md_file.stem,
|
||||
"matched_claims": matched,
|
||||
})
|
||||
logger.info("cascade: notified %s — %s '%s' affected by %s",
|
||||
agent_name, file_type, md_file.stem, matched)
|
||||
|
||||
if notifications:
|
||||
logger.info("cascade: sent %d notifications for PR #%d", notifications, pr_num)
|
||||
|
||||
# Write structured audit_log entry for cascade tracking (Page 4 data)
|
||||
if conn is not None:
|
||||
try:
|
||||
conn.execute(
|
||||
"INSERT INTO audit_log (stage, event, detail) VALUES (?, ?, ?)",
|
||||
("cascade", "cascade_triggered", json.dumps({
|
||||
"pr": pr_num,
|
||||
"claims_changed": list(changed_claims)[:20],
|
||||
"notifications_sent": notifications,
|
||||
"details": notification_details[:50],
|
||||
})),
|
||||
)
|
||||
except Exception:
|
||||
logger.exception("cascade: audit_log write failed (non-fatal)")
|
||||
|
||||
return notifications
|
||||
|
|
@ -156,13 +156,13 @@ CONTRIBUTOR_TIER_RULES = {
|
|||
},
|
||||
}
|
||||
|
||||
# Role weights for CI computation (must match schemas/contribution-weights.yaml)
|
||||
# Role weights for CI computation (must match core/contribution-architecture.md)
|
||||
CONTRIBUTION_ROLE_WEIGHTS = {
|
||||
"challenger": 0.35,
|
||||
"synthesizer": 0.25,
|
||||
"reviewer": 0.20,
|
||||
"sourcer": 0.15,
|
||||
"extractor": 0.40,
|
||||
"challenger": 0.20,
|
||||
"synthesizer": 0.15,
|
||||
"reviewer": 0.10,
|
||||
"extractor": 0.05,
|
||||
}
|
||||
|
||||
# --- Circuit breakers ---
|
||||
|
|
@ -200,6 +200,15 @@ MERGE_INTERVAL = 30
|
|||
FIX_INTERVAL = 60
|
||||
HEALTH_CHECK_INTERVAL = 60
|
||||
|
||||
# --- Extraction gates ---
|
||||
EXTRACTION_COOLDOWN_HOURS = 4 # Skip sources with any PR activity in this window. Defense-in-depth for DB-status filter.
|
||||
|
||||
# --- Retrieval (Telegram bot) ---
|
||||
RETRIEVAL_RRF_K = 20 # RRF smoothing constant — tuned for 5-10 results per source
|
||||
RETRIEVAL_ENTITY_BOOST = 1.5 # RRF score multiplier for claims wiki-linked from matched entities
|
||||
RETRIEVAL_MAX_RESULTS = 10 # Max claims shown to LLM after RRF merge
|
||||
RETRIEVAL_MIN_CLAIM_SCORE = 3.0 # Floor for keyword claim scoring — filters single-stopword matches
|
||||
|
||||
# --- Health API ---
|
||||
HEALTH_PORT = 8080
|
||||
|
||||
|
|
|
|||
201
lib/connect.py
Normal file
201
lib/connect.py
Normal file
|
|
@ -0,0 +1,201 @@
|
|||
"""Atomic extract-and-connect — wire new claims to the KB at extraction time.
|
||||
|
||||
After extraction writes claim files to disk, this module:
|
||||
1. Embeds each new claim (title + description + body snippet)
|
||||
2. Searches Qdrant for semantically similar existing claims
|
||||
3. Adds found neighbors as `related` edges on the NEW claim's frontmatter
|
||||
|
||||
Key design decision: edges are written on the NEW claim, not on existing claims.
|
||||
Writing on existing claims would cause merge conflicts (same reason entities are
|
||||
queued, not written on branches). When the PR merges, embed-on-merge adds the
|
||||
new claim to Qdrant, and reweave can later add reciprocal edges on neighbors.
|
||||
|
||||
Cost: ~$0.0001 per claim (embedding only). No LLM classification — defaults to
|
||||
"related". Reweave handles supports/challenges classification in a separate pass.
|
||||
|
||||
Owner: Epimetheus
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
logger = logging.getLogger("pipeline.connect")
|
||||
|
||||
# Similarity threshold for auto-connecting — below reweave's 0.70 but above
|
||||
# the noise floor (~0.55). "related" still means actually related, not vaguely topical.
|
||||
CONNECT_THRESHOLD = 0.65
|
||||
CONNECT_MAX_NEIGHBORS = 5
|
||||
|
||||
# --- Import search functions ---
|
||||
# This module is called from openrouter-extract-v2.py which may not have lib/ on path
|
||||
# via the package, so handle both import paths.
|
||||
try:
|
||||
from .search import embed_query, search_qdrant
|
||||
from .post_extract import parse_frontmatter, _rebuild_content
|
||||
except ImportError:
|
||||
sys.path.insert(0, os.path.dirname(__file__))
|
||||
from search import embed_query, search_qdrant
|
||||
from post_extract import parse_frontmatter, _rebuild_content
|
||||
|
||||
|
||||
def _build_search_text(content: str) -> str:
|
||||
"""Extract title + description + first 500 chars of body for embedding."""
|
||||
fm, body = parse_frontmatter(content)
|
||||
parts = []
|
||||
if fm:
|
||||
desc = fm.get("description", "")
|
||||
if isinstance(desc, str) and desc:
|
||||
parts.append(desc.strip('"').strip("'"))
|
||||
# Get H1 title from body
|
||||
h1_match = re.search(r"^# (.+)$", body, re.MULTILINE) if body else None
|
||||
if h1_match:
|
||||
parts.append(h1_match.group(1).strip())
|
||||
# Add body snippet (skip H1 line)
|
||||
if body:
|
||||
body_text = re.sub(r"^# .+\n*", "", body).strip()
|
||||
# Stop at "Relevant Notes" or "Topics" sections
|
||||
body_text = re.split(r"\n---\n", body_text)[0].strip()
|
||||
if body_text:
|
||||
parts.append(body_text[:500])
|
||||
return " ".join(parts)
|
||||
|
||||
|
||||
def _add_related_edges(claim_path: str, neighbor_slugs: list[str]) -> bool:
|
||||
"""Add related edges to a claim's frontmatter. Returns True if modified."""
|
||||
try:
|
||||
with open(claim_path) as f:
|
||||
content = f.read()
|
||||
except Exception as e:
|
||||
logger.warning("Cannot read %s: %s", claim_path, e)
|
||||
return False
|
||||
|
||||
fm, body = parse_frontmatter(content)
|
||||
if fm is None:
|
||||
return False
|
||||
|
||||
# Get existing related edges to avoid duplicates
|
||||
existing = fm.get("related", [])
|
||||
if isinstance(existing, str):
|
||||
existing = [existing]
|
||||
elif not isinstance(existing, list):
|
||||
existing = []
|
||||
|
||||
existing_lower = {str(e).strip().lower() for e in existing}
|
||||
|
||||
# Add new edges
|
||||
added = []
|
||||
for slug in neighbor_slugs:
|
||||
if slug.strip().lower() not in existing_lower:
|
||||
added.append(slug)
|
||||
existing_lower.add(slug.strip().lower())
|
||||
|
||||
if not added:
|
||||
return False
|
||||
|
||||
fm["related"] = existing + added
|
||||
|
||||
# Rebuild and write
|
||||
new_content = _rebuild_content(fm, body)
|
||||
with open(claim_path, "w") as f:
|
||||
f.write(new_content)
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def connect_new_claims(
|
||||
claim_paths: list[str],
|
||||
threshold: float = CONNECT_THRESHOLD,
|
||||
max_neighbors: int = CONNECT_MAX_NEIGHBORS,
|
||||
) -> dict:
|
||||
"""Connect newly-written claims to the existing KB via vector search.
|
||||
|
||||
Args:
|
||||
claim_paths: List of file paths to newly-written claim files.
|
||||
threshold: Minimum cosine similarity for connection.
|
||||
max_neighbors: Maximum edges to add per claim.
|
||||
|
||||
Returns:
|
||||
{
|
||||
"total": int,
|
||||
"connected": int,
|
||||
"edges_added": int,
|
||||
"skipped_embed_failed": int,
|
||||
"skipped_no_neighbors": int,
|
||||
"connections": [{"claim": str, "neighbors": [str]}],
|
||||
}
|
||||
"""
|
||||
stats = {
|
||||
"total": len(claim_paths),
|
||||
"connected": 0,
|
||||
"edges_added": 0,
|
||||
"skipped_embed_failed": 0,
|
||||
"skipped_no_neighbors": 0,
|
||||
"connections": [],
|
||||
}
|
||||
|
||||
for claim_path in claim_paths:
|
||||
try:
|
||||
with open(claim_path) as f:
|
||||
content = f.read()
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
# Build search text from claim content
|
||||
search_text = _build_search_text(content)
|
||||
if not search_text or len(search_text) < 20:
|
||||
stats["skipped_no_neighbors"] += 1
|
||||
continue
|
||||
|
||||
# Embed the claim
|
||||
vector = embed_query(search_text)
|
||||
if vector is None:
|
||||
stats["skipped_embed_failed"] += 1
|
||||
continue
|
||||
|
||||
# Search Qdrant for neighbors (exclude nothing — new claim isn't in Qdrant yet)
|
||||
hits = search_qdrant(
|
||||
vector,
|
||||
limit=max_neighbors,
|
||||
domain=None, # Cross-domain connections are valuable
|
||||
score_threshold=threshold,
|
||||
)
|
||||
|
||||
if not hits:
|
||||
stats["skipped_no_neighbors"] += 1
|
||||
continue
|
||||
|
||||
# Extract neighbor slugs (filename stems, not titles — reciprocal edges need resolvable names)
|
||||
neighbor_slugs = []
|
||||
for hit in hits:
|
||||
payload = hit.get("payload", {})
|
||||
claim_path_qdrant = payload.get("claim_path", "")
|
||||
if claim_path_qdrant:
|
||||
slug = claim_path_qdrant.rsplit("/", 1)[-1].replace(".md", "")
|
||||
neighbor_slugs.append(slug)
|
||||
|
||||
if not neighbor_slugs:
|
||||
stats["skipped_no_neighbors"] += 1
|
||||
continue
|
||||
|
||||
# Add edges to the new claim's frontmatter
|
||||
if _add_related_edges(claim_path, neighbor_slugs):
|
||||
stats["connected"] += 1
|
||||
stats["edges_added"] += len(neighbor_slugs)
|
||||
stats["connections"].append({
|
||||
"claim": os.path.basename(claim_path),
|
||||
"neighbors": neighbor_slugs,
|
||||
})
|
||||
logger.info("Connected %s → %d neighbors", os.path.basename(claim_path), len(neighbor_slugs))
|
||||
else:
|
||||
stats["skipped_no_neighbors"] += 1
|
||||
|
||||
logger.info(
|
||||
"Extract-and-connect: %d/%d claims connected (%d edges added, %d embed failed, %d no neighbors)",
|
||||
stats["connected"], stats["total"], stats["edges_added"],
|
||||
stats["skipped_embed_failed"], stats["skipped_no_neighbors"],
|
||||
)
|
||||
|
||||
return stats
|
||||
491
lib/contributor.py
Normal file
491
lib/contributor.py
Normal file
|
|
@ -0,0 +1,491 @@
|
|||
"""Contributor attribution — tracks who contributed what and calculates tiers.
|
||||
|
||||
Extracted from merge.py (Phase 5 decomposition). Functions:
|
||||
- is_knowledge_pr: diff classification (knowledge vs pipeline-only)
|
||||
- refine_commit_type: extract → challenge/enrich refinement from diff content
|
||||
- record_contributor_attribution: parse trailers + frontmatter, upsert contributors
|
||||
- upsert_contributor: insert/update contributor record with role counts
|
||||
- insert_contribution_event: event-sourced credit log (schema v24)
|
||||
- recalculate_tier: tier promotion based on config rules
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
|
||||
from . import config, db
|
||||
from .attribution import AGENT_BRANCH_PREFIXES, classify_kind, normalize_handle
|
||||
from .forgejo import get_pr_diff
|
||||
|
||||
logger = logging.getLogger("pipeline.contributor")
|
||||
|
||||
|
||||
# ─── Event schema (v24) ───────────────────────────────────────────────────
|
||||
|
||||
# Role → CI weight, per Cory's confirmed schema (Apr 24 conversation).
|
||||
# Humans-are-always-author rule: agents never accumulate author credit;
|
||||
# evaluator (0.05) is the only agent-facing role. Internal agents still earn
|
||||
# author/challenger/synthesizer on their own autonomous research PRs but
|
||||
# surface in the kind='agent' leaderboard, not the default person view.
|
||||
ROLE_WEIGHTS = {
|
||||
"author": 0.30,
|
||||
"challenger": 0.25,
|
||||
"synthesizer": 0.20,
|
||||
"originator": 0.15,
|
||||
"evaluator": 0.05,
|
||||
}
|
||||
|
||||
|
||||
def insert_contribution_event(
|
||||
conn,
|
||||
handle: str,
|
||||
role: str,
|
||||
pr_number: int,
|
||||
*,
|
||||
claim_path: str | None = None,
|
||||
domain: str | None = None,
|
||||
channel: str | None = None,
|
||||
timestamp: str | None = None,
|
||||
) -> bool:
|
||||
"""Emit a contribution_events row. Idempotent via UNIQUE constraint.
|
||||
|
||||
Returns True if the event was inserted, False if the constraint blocked it
|
||||
(same handle/role/pr/claim_path combo already recorded — safe to replay).
|
||||
|
||||
Canonicalizes handle via alias table. Classifies kind from handle.
|
||||
Falls back silently if contribution_events table doesn't exist yet (pre-v24).
|
||||
"""
|
||||
if role not in ROLE_WEIGHTS:
|
||||
logger.warning("insert_contribution_event: unknown role %r", role)
|
||||
return False
|
||||
weight = ROLE_WEIGHTS[role]
|
||||
canonical = normalize_handle(handle, conn=conn)
|
||||
if not canonical:
|
||||
return False
|
||||
kind = classify_kind(canonical)
|
||||
try:
|
||||
cur = conn.execute(
|
||||
"""INSERT OR IGNORE INTO contribution_events
|
||||
(handle, kind, role, weight, pr_number, claim_path, domain, channel, timestamp)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, COALESCE(?, datetime('now')))""",
|
||||
(canonical, kind, role, weight, pr_number, claim_path, domain, channel, timestamp),
|
||||
)
|
||||
return cur.rowcount > 0
|
||||
except Exception:
|
||||
logger.debug("insert_contribution_event failed for pr=%d handle=%r role=%r",
|
||||
pr_number, canonical, role, exc_info=True)
|
||||
return False
|
||||
|
||||
|
||||
def is_knowledge_pr(diff: str) -> bool:
|
||||
"""Check if a PR touches knowledge files (claims, decisions, core, foundations).
|
||||
|
||||
Knowledge PRs get full CI attribution weight.
|
||||
Pipeline-only PRs (inbox, entities, agents, archive) get zero CI weight.
|
||||
|
||||
Mixed PRs count as knowledge — if a PR adds a claim, it gets attribution
|
||||
even if it also moves source files. Knowledge takes priority. (Ganymede review)
|
||||
"""
|
||||
knowledge_prefixes = ("domains/", "core/", "foundations/", "decisions/")
|
||||
|
||||
for line in diff.split("\n"):
|
||||
if line.startswith("+++ b/") or line.startswith("--- a/"):
|
||||
path = line.split("/", 1)[1] if "/" in line else ""
|
||||
if any(path.startswith(p) for p in knowledge_prefixes):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
COMMIT_TYPE_TO_ROLE = {
|
||||
"challenge": "challenger",
|
||||
"enrich": "synthesizer",
|
||||
"extract": "extractor",
|
||||
"research": "synthesizer",
|
||||
"entity": "extractor",
|
||||
"reweave": "synthesizer",
|
||||
"fix": "extractor",
|
||||
}
|
||||
|
||||
|
||||
def commit_type_to_role(commit_type: str) -> str:
|
||||
"""Map a refined commit_type to a contributor role."""
|
||||
return COMMIT_TYPE_TO_ROLE.get(commit_type, "extractor")
|
||||
|
||||
|
||||
def refine_commit_type(diff: str, branch_commit_type: str) -> str:
|
||||
"""Refine commit_type from diff content when branch prefix is ambiguous.
|
||||
|
||||
Branch prefix gives initial classification (extract, research, entity, etc.).
|
||||
For 'extract' branches, diff content can distinguish:
|
||||
- challenge: adds challenged_by edges to existing claims
|
||||
- enrich: modifies existing claim frontmatter without new files
|
||||
- extract: creates new claim files (default for extract branches)
|
||||
|
||||
Only refines 'extract' type — other branch types (research, entity, reweave, fix)
|
||||
are already specific enough.
|
||||
"""
|
||||
if branch_commit_type != "extract":
|
||||
return branch_commit_type
|
||||
|
||||
new_files = 0
|
||||
modified_files = 0
|
||||
has_challenge_edge = False
|
||||
|
||||
in_diff_header = False
|
||||
current_is_new = False
|
||||
for line in diff.split("\n"):
|
||||
if line.startswith("diff --git"):
|
||||
in_diff_header = True
|
||||
current_is_new = False
|
||||
elif line.startswith("new file"):
|
||||
current_is_new = True
|
||||
elif line.startswith("+++ b/"):
|
||||
path = line[6:]
|
||||
if any(path.startswith(p) for p in ("domains/", "core/", "foundations/")):
|
||||
if current_is_new:
|
||||
new_files += 1
|
||||
else:
|
||||
modified_files += 1
|
||||
in_diff_header = False
|
||||
elif line.startswith("+") and not line.startswith("+++"):
|
||||
if "challenged_by:" in line or "challenges:" in line:
|
||||
has_challenge_edge = True
|
||||
|
||||
if has_challenge_edge and new_files == 0:
|
||||
return "challenge"
|
||||
if modified_files > 0 and new_files == 0:
|
||||
return "enrich"
|
||||
return "extract"
|
||||
|
||||
|
||||
async def record_contributor_attribution(conn, pr_number: int, branch: str, git_fn):
|
||||
"""Record contributor attribution after a successful merge.
|
||||
|
||||
Parses git trailers and claim frontmatter to identify contributors
|
||||
and their roles. Upserts into contributors table. Refines commit_type
|
||||
from diff content. Pipeline-only PRs (no knowledge files) are skipped.
|
||||
|
||||
Args:
|
||||
git_fn: async callable matching _git signature (for git log parsing).
|
||||
"""
|
||||
from datetime import date as _date
|
||||
|
||||
today = _date.today().isoformat()
|
||||
|
||||
# Get the PR diff to parse claim frontmatter for attribution blocks
|
||||
diff = await get_pr_diff(pr_number)
|
||||
if not diff:
|
||||
return
|
||||
|
||||
# Pipeline-only PRs (inbox, entities, agents) don't count toward CI
|
||||
if not is_knowledge_pr(diff):
|
||||
logger.info("PR #%d: pipeline-only commit — skipping CI attribution", pr_number)
|
||||
return
|
||||
|
||||
# Refine commit_type from diff content (branch prefix may be too broad)
|
||||
row = conn.execute(
|
||||
"SELECT commit_type, submitted_by, domain, source_channel, leo_verdict, "
|
||||
"domain_verdict, domain_agent, merged_at FROM prs WHERE number = ?",
|
||||
(pr_number,),
|
||||
).fetchone()
|
||||
branch_type = row["commit_type"] if row and row["commit_type"] else "extract"
|
||||
refined_type = refine_commit_type(diff, branch_type)
|
||||
if refined_type != branch_type:
|
||||
conn.execute("UPDATE prs SET commit_type = ? WHERE number = ?", (refined_type, pr_number))
|
||||
logger.info("PR #%d: commit_type refined %s → %s", pr_number, branch_type, refined_type)
|
||||
|
||||
# Schema v24 event-sourcing context. Fetched once per PR, reused across emit sites.
|
||||
pr_domain = row["domain"] if row else None
|
||||
pr_channel = row["source_channel"] if row else None
|
||||
pr_submitted_by = row["submitted_by"] if row else None
|
||||
# Use the PR's merged_at timestamp so event time matches the actual merge.
|
||||
# If a merge retries after a crash, this keeps forward-emitted and backfilled
|
||||
# events on the same timeline. Falls back to datetime('now') in the writer.
|
||||
pr_merged_at = row["merged_at"] if row and row["merged_at"] else None
|
||||
|
||||
# ── AUTHOR event (schema v24, double-write) ──
|
||||
# Humans-are-always-author rule: the human in the loop gets author credit.
|
||||
# Precedence: prs.submitted_by (set by extract.py from source proposed_by, or
|
||||
# by discover for human PRs) → git author of first commit → branch-prefix agent.
|
||||
# Pentagon-owned infra branches (extract/ reweave/ fix/ ingestion/) don't get
|
||||
# author events from branch prefix; extract/ PRs carry submitted_by from the
|
||||
# source's proposed_by field so the human who submitted gets credit via path 1.
|
||||
author_candidate: str | None = None
|
||||
if pr_submitted_by:
|
||||
author_candidate = pr_submitted_by
|
||||
else:
|
||||
# External GitHub PRs: git author of the FIRST commit on the branch is
|
||||
# the real submitter. `git log -1` would return the latest commit, which
|
||||
# mis-credits multi-commit PRs where a reviewer rebased or force-pushed.
|
||||
# Take the last line of the unreversed log (= oldest commit, since git
|
||||
# log defaults to reverse-chronological). Ganymede review, Apr 24.
|
||||
rc_author_log, author_log = await git_fn(
|
||||
"log", f"origin/main..origin/{branch}", "--no-merges",
|
||||
"--format=%an", timeout=5,
|
||||
)
|
||||
if rc_author_log == 0 and author_log.strip():
|
||||
lines = [line for line in author_log.strip().split("\n") if line.strip()]
|
||||
if lines:
|
||||
candidate = lines[-1].strip().lower()
|
||||
if candidate and candidate not in {"teleo", "teleo-bot", "pipeline",
|
||||
"github-actions[bot]", "forgejo-actions"}:
|
||||
author_candidate = candidate
|
||||
# Agent-owned branches with no submitted_by: theseus/research-*, leo/*, etc.
|
||||
if not author_candidate and branch.startswith(AGENT_BRANCH_PREFIXES):
|
||||
# Autonomous agent PR (theseus/research-*, leo/entity-*, etc.) —
|
||||
# credit goes to the agent as author per Cory's directive.
|
||||
author_candidate = branch.split("/", 1)[0]
|
||||
|
||||
if author_candidate:
|
||||
insert_contribution_event(
|
||||
conn, author_candidate, "author", pr_number,
|
||||
claim_path=None, domain=pr_domain, channel=pr_channel,
|
||||
timestamp=pr_merged_at,
|
||||
)
|
||||
|
||||
# ── EVALUATOR events (schema v24) ──
|
||||
# Leo reviews every PR (STANDARD/DEEP tiers). domain_agent is the second
|
||||
# reviewer. Both earn evaluator credit (0.05) per approved PR. Skip when
|
||||
# verdict is 'request_changes' — failed review isn't contribution credit.
|
||||
if row:
|
||||
if row["leo_verdict"] == "approve":
|
||||
insert_contribution_event(
|
||||
conn, "leo", "evaluator", pr_number,
|
||||
claim_path=None, domain=pr_domain, channel=pr_channel,
|
||||
timestamp=pr_merged_at,
|
||||
)
|
||||
if row["domain_verdict"] == "approve" and row["domain_agent"]:
|
||||
dagent = row["domain_agent"].strip().lower()
|
||||
if dagent and dagent != "leo": # don't double-credit leo
|
||||
insert_contribution_event(
|
||||
conn, dagent, "evaluator", pr_number,
|
||||
claim_path=None, domain=pr_domain, channel=pr_channel,
|
||||
timestamp=pr_merged_at,
|
||||
)
|
||||
|
||||
# Parse Pentagon-Agent trailer from branch commit messages
|
||||
agents_found: set[str] = set()
|
||||
# Agent-owned branches (theseus/*, rio/*, etc.) give the trailer-named agent
|
||||
# challenger/synthesizer credit based on refined commit_type. Pipeline-owned
|
||||
# branches (extract/*, reweave/*, etc.) don't — those are infra, not work.
|
||||
is_agent_branch = branch.startswith(AGENT_BRANCH_PREFIXES)
|
||||
_TRAILER_EVENT_ROLE = {
|
||||
"challenge": "challenger",
|
||||
"enrich": "synthesizer",
|
||||
"research": "synthesizer",
|
||||
"reweave": "synthesizer",
|
||||
}
|
||||
rc, log_output = await git_fn(
|
||||
"log", f"origin/main..origin/{branch}", "--format=%b%n%N",
|
||||
timeout=10,
|
||||
)
|
||||
if rc == 0:
|
||||
for match in re.finditer(r"Pentagon-Agent:\s*(\S+)\s*<([^>]+)>", log_output):
|
||||
agent_name = match.group(1).lower()
|
||||
agent_uuid = match.group(2)
|
||||
role = commit_type_to_role(refined_type)
|
||||
upsert_contributor(
|
||||
conn, agent_name, agent_uuid, role, today,
|
||||
)
|
||||
# Event-emit only for agent-owned branches where the trailer's agent
|
||||
# actually did the substantive work (challenger/synthesizer).
|
||||
event_role = _TRAILER_EVENT_ROLE.get(refined_type)
|
||||
if is_agent_branch and event_role:
|
||||
insert_contribution_event(
|
||||
conn, agent_name, event_role, pr_number,
|
||||
claim_path=None, domain=pr_domain, channel=pr_channel,
|
||||
timestamp=pr_merged_at,
|
||||
)
|
||||
agents_found.add(agent_name)
|
||||
|
||||
# Parse attribution from NEWLY ADDED knowledge files via the canonical attribution
|
||||
# parser (lib/attribution.py). The previous diff-line regex parser dropped
|
||||
# both the bare-key flat format (`sourcer: alexastrum`) and the nested
|
||||
# `attribution:` block format because it only matched `- handle: "X"` lines.
|
||||
# The Apr 24 incident traced missing leaderboard entries (alexastrum=0,
|
||||
# thesensatore=0, cameron-s1=0) directly to this parser's blind spots.
|
||||
#
|
||||
# --diff-filter=A restricts to added files only (Ganymede review): enrich and
|
||||
# challenge PRs modify existing claims, and re-crediting the existing sourcer on
|
||||
# every modification would inflate counts. The synthesizer/challenger/reviewer
|
||||
# roles for those PRs are credited via the Pentagon-Agent trailer path above.
|
||||
rc_files, files_output = await git_fn(
|
||||
"diff", "--name-only", "--diff-filter=A",
|
||||
f"origin/main...origin/{branch}", timeout=10,
|
||||
)
|
||||
if rc_files == 0 and files_output:
|
||||
from pathlib import Path
|
||||
from . import config
|
||||
from .attribution import parse_attribution_from_file
|
||||
|
||||
main_root = Path(config.MAIN_WORKTREE)
|
||||
# Match is_knowledge_pr's gate exactly. Entities/convictions are excluded
|
||||
# here because is_knowledge_pr skips entity-only PRs at line 123 — so a
|
||||
# broader list here only matters for mixed PRs where the narrower list
|
||||
# already matches via the claim file. Widening requires Cory sign-off
|
||||
# since it would change leaderboard accounting (entity-only PRs → CI credit).
|
||||
knowledge_prefixes = ("domains/", "core/", "foundations/", "decisions/")
|
||||
author_canonical = normalize_handle(author_candidate, conn=conn) if author_candidate else None
|
||||
for rel_path in files_output.strip().split("\n"):
|
||||
rel_path = rel_path.strip()
|
||||
if not rel_path.endswith(".md"):
|
||||
continue
|
||||
if not rel_path.startswith(knowledge_prefixes):
|
||||
continue
|
||||
full = main_root / rel_path
|
||||
if not full.exists():
|
||||
continue # file removed in this PR
|
||||
attribution = parse_attribution_from_file(str(full))
|
||||
for role, entries in attribution.items():
|
||||
for entry in entries:
|
||||
handle = entry.get("handle")
|
||||
if handle:
|
||||
upsert_contributor(
|
||||
conn, handle, entry.get("agent_id"), role, today,
|
||||
)
|
||||
# Event-emit: only 'sourcer' frontmatter entries become
|
||||
# originator events. 'extractor' frontmatter = infrastructure
|
||||
# (the Sonnet extraction agent), no event. challenger/
|
||||
# synthesizer frontmatter is extremely rare at extract time.
|
||||
# Skip originator if same as author — avoids double-credit
|
||||
# when someone submits their own content (self-authored).
|
||||
if role == "sourcer":
|
||||
origin_canonical = normalize_handle(handle, conn=conn)
|
||||
if origin_canonical and origin_canonical != author_canonical:
|
||||
insert_contribution_event(
|
||||
conn, handle, "originator", pr_number,
|
||||
claim_path=rel_path,
|
||||
domain=pr_domain, channel=pr_channel,
|
||||
timestamp=pr_merged_at,
|
||||
)
|
||||
|
||||
# Fallback: if no Pentagon-Agent trailer found, try git commit authors
|
||||
_BOT_AUTHORS = frozenset({
|
||||
"m3taversal", "teleo", "teleo-bot", "pipeline",
|
||||
"github-actions[bot]", "forgejo-actions",
|
||||
})
|
||||
if not agents_found:
|
||||
rc_author, author_output = await git_fn(
|
||||
"log", f"origin/main..origin/{branch}", "--no-merges",
|
||||
"--format=%an", timeout=10,
|
||||
)
|
||||
if rc_author == 0 and author_output.strip():
|
||||
for author_line in author_output.strip().split("\n"):
|
||||
author_name = author_line.strip().lower()
|
||||
if author_name and author_name not in _BOT_AUTHORS:
|
||||
role = commit_type_to_role(refined_type)
|
||||
upsert_contributor(conn, author_name, None, role, today)
|
||||
# Event-model parity: emit challenger/synthesizer event when
|
||||
# the fallback credits a human/agent for that kind of work.
|
||||
# Without this, external-contributor challenge/enrich PRs
|
||||
# accumulate legacy counts but disappear from event-sourced
|
||||
# leaderboards when Phase B cuts over. (Ganymede review.)
|
||||
event_role_fb = _TRAILER_EVENT_ROLE.get(refined_type)
|
||||
if event_role_fb:
|
||||
insert_contribution_event(
|
||||
conn, author_name, event_role_fb, pr_number,
|
||||
claim_path=None, domain=pr_domain, channel=pr_channel,
|
||||
timestamp=pr_merged_at,
|
||||
)
|
||||
agents_found.add(author_name)
|
||||
|
||||
if not agents_found:
|
||||
fb_row = conn.execute(
|
||||
"SELECT agent FROM prs WHERE number = ?", (pr_number,)
|
||||
).fetchone()
|
||||
if fb_row and fb_row["agent"] and fb_row["agent"] != "external":
|
||||
pr_agent = fb_row["agent"].lower()
|
||||
role = commit_type_to_role(refined_type)
|
||||
upsert_contributor(conn, pr_agent, None, role, today)
|
||||
event_role_fb = _TRAILER_EVENT_ROLE.get(refined_type)
|
||||
if event_role_fb:
|
||||
insert_contribution_event(
|
||||
conn, pr_agent, event_role_fb, pr_number,
|
||||
claim_path=None, domain=pr_domain, channel=pr_channel,
|
||||
timestamp=pr_merged_at,
|
||||
)
|
||||
|
||||
|
||||
def upsert_contributor(
|
||||
conn, handle: str, agent_id: str | None, role: str, date_str: str,
|
||||
):
|
||||
"""Upsert a contributor record, incrementing the appropriate role count."""
|
||||
role_col = f"{role}_count"
|
||||
if role_col not in (
|
||||
"sourcer_count", "extractor_count", "challenger_count",
|
||||
"synthesizer_count", "reviewer_count",
|
||||
):
|
||||
logger.warning("Unknown contributor role: %s", role)
|
||||
return
|
||||
|
||||
existing = conn.execute(
|
||||
"SELECT handle FROM contributors WHERE handle = ?", (handle,)
|
||||
).fetchone()
|
||||
|
||||
if existing:
|
||||
conn.execute(
|
||||
f"""UPDATE contributors SET
|
||||
{role_col} = {role_col} + 1,
|
||||
claims_merged = claims_merged + CASE WHEN ? IN ('extractor', 'sourcer') THEN 1 ELSE 0 END,
|
||||
last_contribution = ?,
|
||||
updated_at = datetime('now')
|
||||
WHERE handle = ?""",
|
||||
(role, date_str, handle),
|
||||
)
|
||||
else:
|
||||
conn.execute(
|
||||
f"""INSERT INTO contributors (handle, agent_id, first_contribution, last_contribution, {role_col}, claims_merged)
|
||||
VALUES (?, ?, ?, ?, 1, CASE WHEN ? IN ('extractor', 'sourcer') THEN 1 ELSE 0 END)""",
|
||||
(handle, agent_id, date_str, date_str, role),
|
||||
)
|
||||
|
||||
# Recalculate tier
|
||||
recalculate_tier(conn, handle)
|
||||
|
||||
|
||||
def recalculate_tier(conn, handle: str):
|
||||
"""Recalculate contributor tier based on config rules."""
|
||||
from datetime import date as _date, datetime as _dt
|
||||
|
||||
row = conn.execute(
|
||||
"SELECT claims_merged, challenges_survived, first_contribution, tier FROM contributors WHERE handle = ?",
|
||||
(handle,),
|
||||
).fetchone()
|
||||
if not row:
|
||||
return
|
||||
|
||||
current_tier = row["tier"]
|
||||
claims_merged = row["claims_merged"] or 0
|
||||
challenges_survived = row["challenges_survived"] or 0
|
||||
first_contribution = row["first_contribution"]
|
||||
|
||||
days_since_first = 0
|
||||
if first_contribution:
|
||||
try:
|
||||
first_date = _dt.strptime(first_contribution, "%Y-%m-%d").date()
|
||||
days_since_first = (_date.today() - first_date).days
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# Check veteran first (higher tier)
|
||||
vet_rules = config.CONTRIBUTOR_TIER_RULES["veteran"]
|
||||
if (claims_merged >= vet_rules["claims_merged"]
|
||||
and days_since_first >= vet_rules["min_days_since_first"]
|
||||
and challenges_survived >= vet_rules["challenges_survived"]):
|
||||
new_tier = "veteran"
|
||||
elif claims_merged >= config.CONTRIBUTOR_TIER_RULES["contributor"]["claims_merged"]:
|
||||
new_tier = "contributor"
|
||||
else:
|
||||
new_tier = "new"
|
||||
|
||||
if new_tier != current_tier:
|
||||
conn.execute(
|
||||
"UPDATE contributors SET tier = ?, updated_at = datetime('now') WHERE handle = ?",
|
||||
(new_tier, handle),
|
||||
)
|
||||
logger.info("Contributor %s: tier %s → %s", handle, current_tier, new_tier)
|
||||
db.audit(
|
||||
conn, "contributor", "tier_change",
|
||||
json.dumps({"handle": handle, "from": current_tier, "to": new_tier}),
|
||||
)
|
||||
48
lib/costs.py
48
lib/costs.py
|
|
@ -15,34 +15,55 @@ def record_usage(
|
|||
input_tokens: int = 0,
|
||||
output_tokens: int = 0,
|
||||
backend: str = "api",
|
||||
duration_ms: int = 0,
|
||||
cache_read_tokens: int = 0,
|
||||
cache_write_tokens: int = 0,
|
||||
cost_estimate_usd: float = 0.0,
|
||||
):
|
||||
"""Record usage and compute cost. Returns cost in USD.
|
||||
|
||||
backend: "max" (Claude Max subscription, free) or "api" (paid).
|
||||
Claude Max calls are tracked for volume metrics but cost $0. (Ganymede)
|
||||
"""
|
||||
if backend == "max":
|
||||
cost = 0.0
|
||||
# Always compute estimated cost from tokens × published rates
|
||||
rates = config.MODEL_COSTS.get(model)
|
||||
if rates and (input_tokens or output_tokens):
|
||||
estimated = (input_tokens * rates["input"] + output_tokens * rates["output"]) / 1000
|
||||
# Cache reads are ~90% cheaper than regular input
|
||||
if cache_read_tokens and rates:
|
||||
estimated += (cache_read_tokens * rates["input"] * 0.1) / 1000
|
||||
if cache_write_tokens and rates:
|
||||
estimated += (cache_write_tokens * rates["input"] * 1.25) / 1000
|
||||
else:
|
||||
rates = config.MODEL_COSTS.get(model)
|
||||
if not rates:
|
||||
logger.warning("No cost rates for model %s, recording zero cost", model)
|
||||
cost = 0.0
|
||||
else:
|
||||
cost = (input_tokens * rates["input"] + output_tokens * rates["output"]) / 1000
|
||||
estimated = 0.0
|
||||
# Use caller-provided estimate if we can't compute (e.g. CLI gives its own)
|
||||
if cost_estimate_usd > 0 and estimated == 0:
|
||||
estimated = cost_estimate_usd
|
||||
cost_estimate_usd = estimated
|
||||
|
||||
if backend == "max":
|
||||
cost = 0.0 # subscription — no actual spend
|
||||
else:
|
||||
cost = estimated if estimated > 0 else 0.0
|
||||
|
||||
today = date.today().isoformat()
|
||||
# Include backend in the stage key so max vs api are tracked separately
|
||||
stage_key = f"{stage}:{backend}" if backend != "api" else stage
|
||||
conn.execute(
|
||||
"""INSERT INTO costs (date, model, stage, calls, input_tokens, output_tokens, cost_usd)
|
||||
VALUES (?, ?, ?, 1, ?, ?, ?)
|
||||
"""INSERT INTO costs (date, model, stage, calls, input_tokens, output_tokens, cost_usd,
|
||||
duration_ms, cache_read_tokens, cache_write_tokens, cost_estimate_usd)
|
||||
VALUES (?, ?, ?, 1, ?, ?, ?, ?, ?, ?, ?)
|
||||
ON CONFLICT (date, model, stage) DO UPDATE SET
|
||||
calls = calls + 1,
|
||||
input_tokens = input_tokens + excluded.input_tokens,
|
||||
output_tokens = output_tokens + excluded.output_tokens,
|
||||
cost_usd = cost_usd + excluded.cost_usd""",
|
||||
(today, model, stage_key, input_tokens, output_tokens, cost),
|
||||
cost_usd = cost_usd + excluded.cost_usd,
|
||||
duration_ms = duration_ms + excluded.duration_ms,
|
||||
cache_read_tokens = cache_read_tokens + excluded.cache_read_tokens,
|
||||
cache_write_tokens = cache_write_tokens + excluded.cache_write_tokens,
|
||||
cost_estimate_usd = cost_estimate_usd + excluded.cost_estimate_usd""",
|
||||
(today, model, stage_key, input_tokens, output_tokens, cost,
|
||||
duration_ms, cache_read_tokens, cache_write_tokens, cost_estimate_usd),
|
||||
)
|
||||
return cost
|
||||
|
||||
|
|
@ -63,7 +84,8 @@ def get_daily_breakdown(conn, day: str = None) -> list:
|
|||
if day is None:
|
||||
day = date.today().isoformat()
|
||||
rows = conn.execute(
|
||||
"""SELECT model, stage, calls, input_tokens, output_tokens, cost_usd
|
||||
"""SELECT model, stage, calls, input_tokens, output_tokens, cost_usd,
|
||||
duration_ms, cache_read_tokens, cache_write_tokens, cost_estimate_usd
|
||||
FROM costs WHERE date = ? ORDER BY cost_usd DESC""",
|
||||
(day,),
|
||||
).fetchall()
|
||||
|
|
|
|||
230
lib/cross_domain.py
Normal file
230
lib/cross_domain.py
Normal file
|
|
@ -0,0 +1,230 @@
|
|||
"""Cross-domain citation index — detect entity overlap across domains.
|
||||
|
||||
Hook point: called from merge.py after cascade_after_merge.
|
||||
After a claim merges, checks if its referenced entities also appear in claims
|
||||
from other domains. Logs connections to audit_log for silo detection.
|
||||
|
||||
Two detection methods:
|
||||
1. Entity name matching — entity names appearing in claim body text (word-boundary)
|
||||
2. Source overlap — claims citing the same source archive files
|
||||
|
||||
At ~600 claims and ~100 entities, full scan per merge takes <1 second.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
logger = logging.getLogger("pipeline.cross_domain")
|
||||
|
||||
# Minimum entity name length to avoid false positives (ORE, QCX, etc)
|
||||
MIN_ENTITY_NAME_LEN = 4
|
||||
|
||||
# Entity names that are common English words — skip to avoid false positives
|
||||
ENTITY_STOPLIST = {"versus", "island", "loyal", "saber", "nebula", "helium", "coal", "snapshot", "dropout"}
|
||||
|
||||
|
||||
def _build_entity_names(worktree: Path) -> dict[str, str]:
|
||||
"""Build mapping of entity_slug -> display_name from entity files."""
|
||||
names = {}
|
||||
entity_dir = worktree / "entities"
|
||||
if not entity_dir.exists():
|
||||
return names
|
||||
for md_file in entity_dir.rglob("*.md"):
|
||||
if md_file.name.startswith("_"):
|
||||
continue
|
||||
try:
|
||||
content = md_file.read_text(encoding="utf-8")
|
||||
except (OSError, UnicodeDecodeError):
|
||||
continue
|
||||
for line in content.split("\n"):
|
||||
if line.startswith("name:"):
|
||||
name = line.split(":", 1)[1].strip().strip('"').strip("'")
|
||||
if len(name) >= MIN_ENTITY_NAME_LEN and name.lower() not in ENTITY_STOPLIST:
|
||||
names[md_file.stem] = name
|
||||
break
|
||||
return names
|
||||
|
||||
|
||||
def _compile_entity_patterns(entity_names: dict[str, str]) -> dict[str, re.Pattern]:
|
||||
"""Pre-compile word-boundary regex for each entity name."""
|
||||
patterns = {}
|
||||
for slug, name in entity_names.items():
|
||||
try:
|
||||
patterns[slug] = re.compile(r'\b' + re.escape(name) + r'\b', re.IGNORECASE)
|
||||
except re.error:
|
||||
continue
|
||||
return patterns
|
||||
|
||||
|
||||
def _extract_source_refs(content: str) -> set[str]:
|
||||
"""Extract source archive references ([[YYYY-MM-DD-...]]) from content."""
|
||||
return set(re.findall(r"\[\[(20\d{2}-\d{2}-\d{2}-[^\]]+)\]\]", content))
|
||||
|
||||
|
||||
def _find_entity_mentions(content: str, patterns: dict[str, re.Pattern]) -> set[str]:
|
||||
"""Find entity slugs whose names appear in the content (word-boundary match)."""
|
||||
found = set()
|
||||
for slug, pat in patterns.items():
|
||||
if pat.search(content):
|
||||
found.add(slug)
|
||||
return found
|
||||
|
||||
|
||||
def _scan_domain_claims(worktree: Path, patterns: dict[str, re.Pattern]) -> dict[str, list[dict]]:
|
||||
"""Build domain -> [claim_info] mapping for all claims."""
|
||||
domain_claims = {}
|
||||
domains_dir = worktree / "domains"
|
||||
if not domains_dir.exists():
|
||||
return domain_claims
|
||||
|
||||
for domain_dir in domains_dir.iterdir():
|
||||
if not domain_dir.is_dir():
|
||||
continue
|
||||
claims = []
|
||||
for claim_file in domain_dir.glob("*.md"):
|
||||
if claim_file.name.startswith("_") or claim_file.name == "directory.md":
|
||||
continue
|
||||
try:
|
||||
content = claim_file.read_text(encoding="utf-8")
|
||||
except (OSError, UnicodeDecodeError):
|
||||
continue
|
||||
claims.append({
|
||||
"slug": claim_file.stem,
|
||||
"entities": _find_entity_mentions(content, patterns),
|
||||
"sources": _extract_source_refs(content),
|
||||
})
|
||||
domain_claims[domain_dir.name] = claims
|
||||
return domain_claims
|
||||
|
||||
|
||||
async def cross_domain_after_merge(
|
||||
main_sha: str,
|
||||
branch_sha: str,
|
||||
pr_num: int,
|
||||
main_worktree: Path,
|
||||
conn=None,
|
||||
) -> int:
|
||||
"""Detect cross-domain entity/source overlap for claims changed in this merge.
|
||||
|
||||
Returns the number of cross-domain connections found.
|
||||
"""
|
||||
# 1. Get changed files
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
"git", "diff", "--name-only", "--diff-filter=ACMR",
|
||||
main_sha, branch_sha,
|
||||
cwd=str(main_worktree),
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
)
|
||||
try:
|
||||
stdout, _ = await asyncio.wait_for(proc.communicate(), timeout=10)
|
||||
except asyncio.TimeoutError:
|
||||
proc.kill()
|
||||
await proc.wait()
|
||||
logger.warning("cross_domain: git diff timed out")
|
||||
return 0
|
||||
|
||||
if proc.returncode != 0:
|
||||
return 0
|
||||
|
||||
diff_files = [f for f in stdout.decode().strip().split("\n") if f]
|
||||
|
||||
# 2. Filter to claim files
|
||||
changed_claims = []
|
||||
for fpath in diff_files:
|
||||
if not fpath.endswith(".md") or not fpath.startswith("domains/"):
|
||||
continue
|
||||
parts = fpath.split("/")
|
||||
if len(parts) < 3:
|
||||
continue
|
||||
basename = os.path.basename(fpath)
|
||||
if basename.startswith("_") or basename == "directory.md":
|
||||
continue
|
||||
changed_claims.append({"path": fpath, "domain": parts[1], "slug": Path(basename).stem})
|
||||
|
||||
if not changed_claims:
|
||||
return 0
|
||||
|
||||
# 3. Build entity patterns and scan all claims
|
||||
entity_names = _build_entity_names(main_worktree)
|
||||
if not entity_names:
|
||||
return 0
|
||||
|
||||
patterns = _compile_entity_patterns(entity_names)
|
||||
domain_claims = _scan_domain_claims(main_worktree, patterns)
|
||||
|
||||
# 4. For each changed claim, find cross-domain connections
|
||||
total_connections = 0
|
||||
all_connections = []
|
||||
|
||||
for claim in changed_claims:
|
||||
claim_path = main_worktree / claim["path"]
|
||||
try:
|
||||
content = claim_path.read_text(encoding="utf-8")
|
||||
except (OSError, UnicodeDecodeError):
|
||||
continue
|
||||
|
||||
my_entities = _find_entity_mentions(content, patterns)
|
||||
my_sources = _extract_source_refs(content)
|
||||
|
||||
if not my_entities and not my_sources:
|
||||
continue
|
||||
|
||||
connections = []
|
||||
for other_domain, other_claims in domain_claims.items():
|
||||
if other_domain == claim["domain"]:
|
||||
continue
|
||||
for other in other_claims:
|
||||
shared_entities = my_entities & other["entities"]
|
||||
shared_sources = my_sources & other["sources"]
|
||||
|
||||
# Threshold: >=2 shared entities, OR 1 entity + 1 source
|
||||
entity_count = len(shared_entities)
|
||||
source_count = len(shared_sources)
|
||||
|
||||
if entity_count >= 2 or (entity_count >= 1 and source_count >= 1):
|
||||
connections.append({
|
||||
"other_claim": other["slug"],
|
||||
"other_domain": other_domain,
|
||||
"shared_entities": sorted(shared_entities)[:5],
|
||||
"shared_sources": sorted(shared_sources)[:3],
|
||||
})
|
||||
|
||||
if connections:
|
||||
total_connections += len(connections)
|
||||
all_connections.append({
|
||||
"claim": claim["slug"],
|
||||
"domain": claim["domain"],
|
||||
"connections": connections[:10],
|
||||
})
|
||||
logger.info(
|
||||
"cross_domain: %s (%s) has %d cross-domain connections",
|
||||
claim["slug"], claim["domain"], len(connections),
|
||||
)
|
||||
|
||||
# 5. Log to audit_log
|
||||
if all_connections and conn is not None:
|
||||
try:
|
||||
conn.execute(
|
||||
"INSERT INTO audit_log (stage, event, detail) VALUES (?, ?, ?)",
|
||||
("cross_domain", "connections_found", json.dumps({
|
||||
"pr": pr_num,
|
||||
"total_connections": total_connections,
|
||||
"claims_with_connections": len(all_connections),
|
||||
"details": all_connections[:10],
|
||||
})),
|
||||
)
|
||||
except Exception:
|
||||
logger.exception("cross_domain: audit_log write failed (non-fatal)")
|
||||
|
||||
if total_connections:
|
||||
logger.info(
|
||||
"cross_domain: PR #%d — %d connections across %d claims",
|
||||
pr_num, total_connections, len(all_connections),
|
||||
)
|
||||
|
||||
return total_connections
|
||||
652
lib/db.py
652
lib/db.py
|
|
@ -9,7 +9,7 @@ from . import config
|
|||
|
||||
logger = logging.getLogger("pipeline.db")
|
||||
|
||||
SCHEMA_VERSION = 6
|
||||
SCHEMA_VERSION = 26
|
||||
|
||||
SCHEMA_SQL = """
|
||||
CREATE TABLE IF NOT EXISTS schema_version (
|
||||
|
|
@ -35,6 +35,15 @@ CREATE TABLE IF NOT EXISTS sources (
|
|||
feedback TEXT,
|
||||
-- eval feedback for re-extraction (JSON)
|
||||
cost_usd REAL DEFAULT 0,
|
||||
-- v26: provenance — publisher (news org / venue) + content author.
|
||||
-- publisher_id references publishers(id) when source is from a known org.
|
||||
-- original_author_handle references contributors(handle) when author is in our system.
|
||||
-- original_author is free-text fallback ("Kim et al.", "Robin Hanson") — not credit-bearing.
|
||||
publisher_id INTEGER REFERENCES publishers(id),
|
||||
content_type TEXT,
|
||||
-- article | paper | tweet | conversation | self_authored | webpage | podcast
|
||||
original_author TEXT,
|
||||
original_author_handle TEXT REFERENCES contributors(handle),
|
||||
created_at TEXT DEFAULT (datetime('now')),
|
||||
updated_at TEXT DEFAULT (datetime('now'))
|
||||
);
|
||||
|
|
@ -48,6 +57,7 @@ CREATE TABLE IF NOT EXISTS prs (
|
|||
-- conflict: rebase failed or merge timed out — needs human intervention
|
||||
domain TEXT,
|
||||
agent TEXT,
|
||||
commit_type TEXT CHECK(commit_type IS NULL OR commit_type IN ('extract', 'research', 'entity', 'decision', 'reweave', 'fix', 'challenge', 'enrich', 'synthesize', 'unknown')),
|
||||
tier TEXT,
|
||||
-- LIGHT, STANDARD, DEEP
|
||||
tier0_pass INTEGER,
|
||||
|
|
@ -68,6 +78,9 @@ CREATE TABLE IF NOT EXISTS prs (
|
|||
last_error TEXT,
|
||||
last_attempt TEXT,
|
||||
cost_usd REAL DEFAULT 0,
|
||||
auto_merge INTEGER DEFAULT 0,
|
||||
github_pr INTEGER,
|
||||
source_channel TEXT,
|
||||
created_at TEXT DEFAULT (datetime('now')),
|
||||
merged_at TEXT
|
||||
);
|
||||
|
|
@ -103,11 +116,133 @@ CREATE TABLE IF NOT EXISTS audit_log (
|
|||
detail TEXT
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS response_audit (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
timestamp TEXT NOT NULL DEFAULT (datetime('now')),
|
||||
chat_id INTEGER,
|
||||
user TEXT,
|
||||
agent TEXT DEFAULT 'rio',
|
||||
model TEXT,
|
||||
query TEXT,
|
||||
conversation_window TEXT,
|
||||
-- JSON: prior N messages for context
|
||||
-- NOTE: intentional duplication of transcript data for audit self-containment.
|
||||
-- Transcripts live in /opt/teleo-eval/transcripts/ but audit rows need prompt
|
||||
-- context inline for retrieval-quality diagnosis. Primary driver of row size —
|
||||
-- target for cleanup when 90-day retention policy lands.
|
||||
entities_matched TEXT,
|
||||
-- JSON: [{name, path, score, used_in_response}]
|
||||
claims_matched TEXT,
|
||||
-- JSON: [{path, title, score, source, used_in_response}]
|
||||
retrieval_layers_hit TEXT,
|
||||
-- JSON: ["keyword","qdrant","graph"]
|
||||
retrieval_gap TEXT,
|
||||
-- What the KB was missing (if anything)
|
||||
market_data TEXT,
|
||||
-- JSON: injected token prices
|
||||
research_context TEXT,
|
||||
-- Haiku pre-pass results if any
|
||||
kb_context_text TEXT,
|
||||
-- Full context string sent to model
|
||||
tool_calls TEXT,
|
||||
-- JSON: ordered array [{tool, input, output, duration_ms, ts}]
|
||||
raw_response TEXT,
|
||||
display_response TEXT,
|
||||
confidence_score REAL,
|
||||
-- Model self-rated retrieval quality 0.0-1.0
|
||||
response_time_ms INTEGER,
|
||||
-- Eval pipeline columns (v10)
|
||||
prompt_tokens INTEGER,
|
||||
completion_tokens INTEGER,
|
||||
generation_cost REAL,
|
||||
embedding_cost REAL,
|
||||
total_cost REAL,
|
||||
blocked INTEGER DEFAULT 0,
|
||||
block_reason TEXT,
|
||||
query_type TEXT,
|
||||
created_at TEXT DEFAULT (datetime('now'))
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_sources_status ON sources(status);
|
||||
CREATE INDEX IF NOT EXISTS idx_prs_status ON prs(status);
|
||||
CREATE INDEX IF NOT EXISTS idx_prs_domain ON prs(domain);
|
||||
CREATE INDEX IF NOT EXISTS idx_prs_source_path ON prs(source_path) WHERE source_path IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_costs_date ON costs(date);
|
||||
CREATE INDEX IF NOT EXISTS idx_audit_stage ON audit_log(stage);
|
||||
CREATE INDEX IF NOT EXISTS idx_response_audit_ts ON response_audit(timestamp);
|
||||
CREATE INDEX IF NOT EXISTS idx_response_audit_agent ON response_audit(agent);
|
||||
CREATE INDEX IF NOT EXISTS idx_response_audit_chat_ts ON response_audit(chat_id, timestamp);
|
||||
|
||||
-- Event-sourced contributions (schema v24).
|
||||
-- One row per credit-earning event. Idempotent via two partial UNIQUE indexes
|
||||
-- (SQLite treats NULL != NULL in UNIQUE constraints, so a single composite
|
||||
-- UNIQUE with nullable claim_path would allow evaluator-event duplicates).
|
||||
-- Leaderboards are SQL aggregations over this table; contributors becomes a materialized cache.
|
||||
CREATE TABLE IF NOT EXISTS contribution_events (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
handle TEXT NOT NULL,
|
||||
kind TEXT NOT NULL DEFAULT 'person',
|
||||
-- person | org | agent
|
||||
role TEXT NOT NULL,
|
||||
-- author | originator | challenger | synthesizer | evaluator
|
||||
weight REAL NOT NULL,
|
||||
pr_number INTEGER NOT NULL,
|
||||
claim_path TEXT,
|
||||
-- NULL for PR-level events (e.g. evaluator). Set for per-claim events.
|
||||
domain TEXT,
|
||||
channel TEXT,
|
||||
-- telegram | github | agent | web | unknown
|
||||
timestamp TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
);
|
||||
-- Per-claim events: unique on (handle, role, pr_number, claim_path) when path IS NOT NULL.
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_ce_unique_claim ON contribution_events(
|
||||
handle, role, pr_number, claim_path
|
||||
) WHERE claim_path IS NOT NULL;
|
||||
-- PR-level events (evaluator, author, trailer-based): unique on (handle, role, pr_number) when path IS NULL.
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_ce_unique_pr ON contribution_events(
|
||||
handle, role, pr_number
|
||||
) WHERE claim_path IS NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_ce_handle_ts ON contribution_events(handle, timestamp);
|
||||
CREATE INDEX IF NOT EXISTS idx_ce_domain_ts ON contribution_events(domain, timestamp);
|
||||
CREATE INDEX IF NOT EXISTS idx_ce_pr ON contribution_events(pr_number);
|
||||
CREATE INDEX IF NOT EXISTS idx_ce_role_ts ON contribution_events(role, timestamp);
|
||||
CREATE INDEX IF NOT EXISTS idx_ce_kind_ts ON contribution_events(kind, timestamp);
|
||||
|
||||
-- Handle aliasing. @thesensatore → thesensatore. cameron → cameron-s1.
|
||||
-- Writers call resolve_alias(handle) before inserting events or upserting contributors.
|
||||
CREATE TABLE IF NOT EXISTS contributor_aliases (
|
||||
alias TEXT PRIMARY KEY,
|
||||
canonical TEXT NOT NULL,
|
||||
created_at TEXT DEFAULT (datetime('now'))
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_aliases_canonical ON contributor_aliases(canonical);
|
||||
|
||||
-- Publishers: news orgs, academic venues, social platforms. NOT contributors — these
|
||||
-- provide metadata/provenance for sources, never earn leaderboard credit. Separating
|
||||
-- these from contributors prevents CNBC/SpaceNews from dominating the leaderboard.
|
||||
-- (Apr 24 Cory directive: "only credit the original source if its on X or tg")
|
||||
CREATE TABLE IF NOT EXISTS publishers (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
name TEXT NOT NULL UNIQUE,
|
||||
kind TEXT CHECK(kind IN ('news', 'academic', 'social_platform', 'podcast', 'self', 'internal', 'legal', 'government', 'research_org', 'commercial', 'other')),
|
||||
url_pattern TEXT,
|
||||
created_at TEXT DEFAULT (datetime('now'))
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_publishers_name ON publishers(name);
|
||||
CREATE INDEX IF NOT EXISTS idx_publishers_kind ON publishers(kind);
|
||||
|
||||
-- Multi-platform identity: one contributor, many handles. Enables the leaderboard to
|
||||
-- unify @thesensatore (X) + thesensatore (TG) + thesensatore@github into one person.
|
||||
-- Writers check this table after resolving aliases to find canonical contributor handle.
|
||||
CREATE TABLE IF NOT EXISTS contributor_identities (
|
||||
contributor_handle TEXT NOT NULL,
|
||||
platform TEXT NOT NULL CHECK(platform IN ('x', 'telegram', 'github', 'email', 'web', 'internal')),
|
||||
platform_handle TEXT NOT NULL,
|
||||
verified INTEGER DEFAULT 0,
|
||||
created_at TEXT DEFAULT (datetime('now')),
|
||||
PRIMARY KEY (platform, platform_handle)
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_identities_contributor ON contributor_identities(contributor_handle);
|
||||
"""
|
||||
|
||||
|
||||
|
|
@ -140,6 +275,83 @@ def transaction(conn: sqlite3.Connection):
|
|||
raise
|
||||
|
||||
|
||||
# Branch prefix → (agent, commit_type) mapping.
|
||||
# Single source of truth — used by merge.py at INSERT time and migration v7 backfill.
|
||||
# Unknown prefixes → ('unknown', 'unknown') + warning log.
|
||||
# Keep in sync with _CHANNEL_MAP below.
|
||||
BRANCH_PREFIX_MAP = {
|
||||
"extract": ("pipeline", "extract"),
|
||||
"ingestion": ("pipeline", "extract"),
|
||||
"epimetheus": ("epimetheus", "extract"),
|
||||
"rio": ("rio", "research"),
|
||||
"theseus": ("theseus", "research"),
|
||||
"astra": ("astra", "research"),
|
||||
"vida": ("vida", "research"),
|
||||
"clay": ("clay", "research"),
|
||||
"leo": ("leo", "entity"),
|
||||
"reweave": ("pipeline", "reweave"),
|
||||
"fix": ("pipeline", "fix"),
|
||||
"contrib": ("external", "contrib"),
|
||||
}
|
||||
|
||||
|
||||
def classify_branch(branch: str) -> tuple[str, str]:
|
||||
"""Derive (agent, commit_type) from branch prefix.
|
||||
|
||||
Returns ('unknown', 'unknown') and logs a warning for unrecognized prefixes.
|
||||
"""
|
||||
prefix = branch.split("/", 1)[0] if "/" in branch else branch
|
||||
# Fork PR branches: gh-pr-N/original-branch
|
||||
if prefix.startswith("gh-pr-"):
|
||||
return ("external", "contrib")
|
||||
result = BRANCH_PREFIX_MAP.get(prefix)
|
||||
if result is None:
|
||||
logger.warning("Unknown branch prefix %r in branch %r — defaulting to ('unknown', 'unknown')", prefix, branch)
|
||||
return ("unknown", "unknown")
|
||||
return result
|
||||
|
||||
|
||||
# Keep in sync with BRANCH_PREFIX_MAP above.
|
||||
#
|
||||
# Valid source_channel values: github | telegram | agent | maintenance | web | unknown
|
||||
# - github: external contributor PR (set via sync-mirror.sh github_pr linking,
|
||||
# or from gh-pr-* branches, or any time github_pr is provided)
|
||||
# - telegram: message captured by telegram bot (must be tagged explicitly by
|
||||
# ingestion — extract/* default is "unknown" because the bare branch prefix
|
||||
# can no longer distinguish telegram-origin from github-origin extractions)
|
||||
# - agent: per-agent research branches (rio/, theseus/, etc.)
|
||||
# - maintenance: pipeline housekeeping (reweave/, epimetheus/, fix/)
|
||||
# - web: future in-app submissions (chat UI or form posts)
|
||||
# - unknown: fallback when provenance cannot be determined
|
||||
_CHANNEL_MAP = {
|
||||
"extract": "unknown",
|
||||
"ingestion": "unknown",
|
||||
"rio": "agent",
|
||||
"theseus": "agent",
|
||||
"astra": "agent",
|
||||
"vida": "agent",
|
||||
"clay": "agent",
|
||||
"leo": "agent",
|
||||
"oberon": "agent",
|
||||
"reweave": "maintenance",
|
||||
"epimetheus": "maintenance",
|
||||
"fix": "maintenance",
|
||||
}
|
||||
|
||||
|
||||
def classify_source_channel(branch: str, *, github_pr: int = None) -> str:
|
||||
"""Derive source_channel from branch prefix and github_pr flag.
|
||||
|
||||
Precedence: github_pr flag > gh-pr- branch prefix > _CHANNEL_MAP lookup.
|
||||
extract/* defaults to "unknown" — callers with better provenance (telegram
|
||||
bot, web submission handler) must override at PR-insert time.
|
||||
"""
|
||||
if github_pr is not None or branch.startswith("gh-pr-"):
|
||||
return "github"
|
||||
prefix = branch.split("/", 1)[0] if "/" in branch else branch
|
||||
return _CHANNEL_MAP.get(prefix, "unknown")
|
||||
|
||||
|
||||
def migrate(conn: sqlite3.Connection):
|
||||
"""Run schema migrations."""
|
||||
conn.executescript(SCHEMA_SQL)
|
||||
|
|
@ -251,11 +463,394 @@ def migrate(conn: sqlite3.Connection):
|
|||
""")
|
||||
logger.info("Migration v6: added metrics_snapshots table for analytics dashboard")
|
||||
|
||||
if current < 7:
|
||||
# Phase 7: agent attribution + commit_type for dashboard
|
||||
# commit_type column + backfill agent/commit_type from branch prefix
|
||||
try:
|
||||
conn.execute("ALTER TABLE prs ADD COLUMN commit_type TEXT CHECK(commit_type IS NULL OR commit_type IN ('extract', 'research', 'entity', 'decision', 'reweave', 'fix', 'unknown'))")
|
||||
except sqlite3.OperationalError:
|
||||
pass # column already exists from CREATE TABLE
|
||||
# Backfill agent and commit_type from branch prefix
|
||||
rows = conn.execute("SELECT number, branch FROM prs WHERE branch IS NOT NULL").fetchall()
|
||||
for row in rows:
|
||||
agent, commit_type = classify_branch(row["branch"])
|
||||
conn.execute(
|
||||
"UPDATE prs SET agent = ?, commit_type = ? WHERE number = ? AND (agent IS NULL OR commit_type IS NULL)",
|
||||
(agent, commit_type, row["number"]),
|
||||
)
|
||||
backfilled = len(rows)
|
||||
logger.info("Migration v7: added commit_type column, backfilled %d PRs with agent/commit_type", backfilled)
|
||||
|
||||
if current < 8:
|
||||
# Phase 8: response audit — full-chain visibility for agent response quality
|
||||
# Captures: query → tool calls → retrieval → context → response → confidence
|
||||
# Approved by Ganymede (architecture), Rio (agent needs), Rhea (ops)
|
||||
conn.executescript("""
|
||||
CREATE TABLE IF NOT EXISTS response_audit (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
timestamp TEXT NOT NULL DEFAULT (datetime('now')),
|
||||
chat_id INTEGER,
|
||||
user TEXT,
|
||||
agent TEXT DEFAULT 'rio',
|
||||
model TEXT,
|
||||
query TEXT,
|
||||
conversation_window TEXT, -- intentional transcript duplication for audit self-containment
|
||||
entities_matched TEXT,
|
||||
claims_matched TEXT,
|
||||
retrieval_layers_hit TEXT,
|
||||
retrieval_gap TEXT,
|
||||
market_data TEXT,
|
||||
research_context TEXT,
|
||||
kb_context_text TEXT,
|
||||
tool_calls TEXT,
|
||||
raw_response TEXT,
|
||||
display_response TEXT,
|
||||
confidence_score REAL,
|
||||
response_time_ms INTEGER,
|
||||
created_at TEXT DEFAULT (datetime('now'))
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_response_audit_ts ON response_audit(timestamp);
|
||||
CREATE INDEX IF NOT EXISTS idx_response_audit_agent ON response_audit(agent);
|
||||
CREATE INDEX IF NOT EXISTS idx_response_audit_chat_ts ON response_audit(chat_id, timestamp);
|
||||
""")
|
||||
logger.info("Migration v8: added response_audit table for agent response auditing")
|
||||
|
||||
if current < 9:
|
||||
# Phase 9: rebuild prs table to expand CHECK constraint on commit_type.
|
||||
# SQLite cannot ALTER CHECK constraints in-place — must rebuild table.
|
||||
# Old constraint (v7): extract,research,entity,decision,reweave,fix,unknown
|
||||
# New constraint: adds challenge,enrich,synthesize
|
||||
# Also re-derive commit_type from branch prefix for rows with invalid/NULL values.
|
||||
|
||||
# Step 1: Get all column names from existing table
|
||||
cols_info = conn.execute("PRAGMA table_info(prs)").fetchall()
|
||||
col_names = [c["name"] for c in cols_info]
|
||||
col_list = ", ".join(col_names)
|
||||
|
||||
# Step 2: Create new table with expanded CHECK constraint
|
||||
conn.executescript(f"""
|
||||
CREATE TABLE prs_new (
|
||||
number INTEGER PRIMARY KEY,
|
||||
source_path TEXT REFERENCES sources(path),
|
||||
branch TEXT,
|
||||
status TEXT NOT NULL DEFAULT 'open',
|
||||
domain TEXT,
|
||||
agent TEXT,
|
||||
commit_type TEXT CHECK(commit_type IS NULL OR commit_type IN ('extract','research','entity','decision','reweave','fix','challenge','enrich','synthesize','unknown')),
|
||||
tier TEXT,
|
||||
tier0_pass INTEGER,
|
||||
leo_verdict TEXT DEFAULT 'pending',
|
||||
domain_verdict TEXT DEFAULT 'pending',
|
||||
domain_agent TEXT,
|
||||
domain_model TEXT,
|
||||
priority TEXT,
|
||||
origin TEXT DEFAULT 'pipeline',
|
||||
transient_retries INTEGER DEFAULT 0,
|
||||
substantive_retries INTEGER DEFAULT 0,
|
||||
last_error TEXT,
|
||||
last_attempt TEXT,
|
||||
cost_usd REAL DEFAULT 0,
|
||||
created_at TEXT DEFAULT (datetime('now')),
|
||||
merged_at TEXT
|
||||
);
|
||||
INSERT INTO prs_new ({col_list}) SELECT {col_list} FROM prs;
|
||||
DROP TABLE prs;
|
||||
ALTER TABLE prs_new RENAME TO prs;
|
||||
""")
|
||||
logger.info("Migration v9: rebuilt prs table with expanded commit_type CHECK constraint")
|
||||
|
||||
# Step 3: Re-derive commit_type from branch prefix for invalid/NULL values
|
||||
rows = conn.execute(
|
||||
"""SELECT number, branch FROM prs
|
||||
WHERE branch IS NOT NULL
|
||||
AND (commit_type IS NULL
|
||||
OR commit_type NOT IN ('extract','research','entity','decision','reweave','fix','challenge','enrich','synthesize','unknown'))"""
|
||||
).fetchall()
|
||||
fixed = 0
|
||||
for row in rows:
|
||||
agent, commit_type = classify_branch(row["branch"])
|
||||
conn.execute(
|
||||
"UPDATE prs SET agent = COALESCE(agent, ?), commit_type = ? WHERE number = ?",
|
||||
(agent, commit_type, row["number"]),
|
||||
)
|
||||
fixed += 1
|
||||
conn.commit()
|
||||
logger.info("Migration v9: re-derived commit_type for %d PRs with invalid/NULL values", fixed)
|
||||
|
||||
if current < 10:
|
||||
# Add eval pipeline columns to response_audit
|
||||
# VPS may already be at v10/v11 from prior (incomplete) deploys — use IF NOT EXISTS pattern
|
||||
for col_def in [
|
||||
("prompt_tokens", "INTEGER"),
|
||||
("completion_tokens", "INTEGER"),
|
||||
("generation_cost", "REAL"),
|
||||
("embedding_cost", "REAL"),
|
||||
("total_cost", "REAL"),
|
||||
("blocked", "INTEGER DEFAULT 0"),
|
||||
("block_reason", "TEXT"),
|
||||
("query_type", "TEXT"),
|
||||
]:
|
||||
try:
|
||||
conn.execute(f"ALTER TABLE response_audit ADD COLUMN {col_def[0]} {col_def[1]}")
|
||||
except sqlite3.OperationalError:
|
||||
pass # Column already exists
|
||||
conn.commit()
|
||||
logger.info("Migration v10: added eval pipeline columns to response_audit")
|
||||
|
||||
if current < 11:
|
||||
# Add auto_merge flag for agent PR auto-merge (eval-approved agent branches)
|
||||
try:
|
||||
conn.execute("ALTER TABLE prs ADD COLUMN auto_merge INTEGER DEFAULT 0")
|
||||
except sqlite3.OperationalError:
|
||||
pass # Column already exists (VPS may be ahead of repo schema)
|
||||
conn.commit()
|
||||
logger.info("Migration v11: added auto_merge column to prs table")
|
||||
|
||||
|
||||
# v12-v16 ran manually on VPS before code was version-controlled.
|
||||
# Their changes are consolidated into v17+ migrations below.
|
||||
|
||||
if current < 17:
|
||||
# Add prompt/pipeline version tracking per PR
|
||||
for col, default in [
|
||||
("prompt_version", None),
|
||||
("pipeline_version", None),
|
||||
]:
|
||||
try:
|
||||
conn.execute(f"ALTER TABLE prs ADD COLUMN {col} TEXT")
|
||||
except sqlite3.OperationalError:
|
||||
pass # Column already exists
|
||||
conn.commit()
|
||||
logger.info("Migration v17: added prompt_version, pipeline_version to prs table")
|
||||
|
||||
if current < 18:
|
||||
conn.executescript("""
|
||||
CREATE TABLE IF NOT EXISTS review_records (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
pr_number INTEGER NOT NULL,
|
||||
claim_path TEXT,
|
||||
domain TEXT,
|
||||
agent TEXT,
|
||||
reviewer TEXT,
|
||||
reviewer_model TEXT,
|
||||
outcome TEXT NOT NULL,
|
||||
rejection_reason TEXT,
|
||||
disagreement_type TEXT,
|
||||
notes TEXT,
|
||||
batch_id TEXT,
|
||||
claims_in_batch INTEGER,
|
||||
reviewed_at TEXT DEFAULT (datetime('now'))
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_review_records_pr ON review_records(pr_number);
|
||||
CREATE INDEX IF NOT EXISTS idx_review_records_agent ON review_records(agent);
|
||||
""")
|
||||
conn.commit()
|
||||
logger.info("Migration v18: created review_records table")
|
||||
|
||||
if current < 19:
|
||||
# Add submitted_by for contributor attribution tracing.
|
||||
# Tracks who submitted the source: human handle, agent name, or "self-directed".
|
||||
try:
|
||||
conn.execute("ALTER TABLE prs ADD COLUMN submitted_by TEXT")
|
||||
except sqlite3.OperationalError:
|
||||
pass # Column already exists
|
||||
try:
|
||||
conn.execute("ALTER TABLE sources ADD COLUMN submitted_by TEXT")
|
||||
except sqlite3.OperationalError:
|
||||
pass
|
||||
conn.commit()
|
||||
logger.info("Migration v19: added submitted_by to prs and sources tables")
|
||||
|
||||
if current < 20:
|
||||
for col, default in [
|
||||
("conflict_rebase_attempts", "INTEGER DEFAULT 0"),
|
||||
("merge_failures", "INTEGER DEFAULT 0"),
|
||||
("merge_cycled", "INTEGER DEFAULT 0"),
|
||||
]:
|
||||
try:
|
||||
conn.execute(f"ALTER TABLE prs ADD COLUMN {col} {default}")
|
||||
except sqlite3.OperationalError:
|
||||
pass
|
||||
conn.commit()
|
||||
logger.info("Migration v20: added conflict retry columns to prs")
|
||||
|
||||
if current < 21:
|
||||
try:
|
||||
conn.execute("ALTER TABLE prs ADD COLUMN github_pr INTEGER")
|
||||
except sqlite3.OperationalError:
|
||||
pass
|
||||
conn.execute(
|
||||
"CREATE INDEX IF NOT EXISTS idx_prs_github_pr ON prs (github_pr) WHERE github_pr IS NOT NULL"
|
||||
)
|
||||
conn.commit()
|
||||
logger.info("Migration v21: added github_pr column + index to prs")
|
||||
|
||||
if current < 22:
|
||||
try:
|
||||
conn.execute("ALTER TABLE prs ADD COLUMN source_channel TEXT")
|
||||
except sqlite3.OperationalError:
|
||||
pass
|
||||
conn.execute("""
|
||||
UPDATE prs SET source_channel = CASE
|
||||
WHEN github_pr IS NOT NULL THEN 'github'
|
||||
WHEN branch LIKE 'gh-pr-%%' THEN 'github'
|
||||
WHEN branch LIKE 'theseus/%%' THEN 'agent'
|
||||
WHEN branch LIKE 'rio/%%' THEN 'agent'
|
||||
WHEN branch LIKE 'astra/%%' THEN 'agent'
|
||||
WHEN branch LIKE 'clay/%%' THEN 'agent'
|
||||
WHEN branch LIKE 'vida/%%' THEN 'agent'
|
||||
WHEN branch LIKE 'oberon/%%' THEN 'agent'
|
||||
WHEN branch LIKE 'leo/%%' THEN 'agent'
|
||||
WHEN branch LIKE 'reweave/%%' THEN 'maintenance'
|
||||
WHEN branch LIKE 'epimetheus/%%' THEN 'maintenance'
|
||||
WHEN branch LIKE 'fix/%%' THEN 'maintenance'
|
||||
WHEN branch LIKE 'extract/%%' THEN 'telegram'
|
||||
WHEN branch LIKE 'ingestion/%%' THEN 'telegram'
|
||||
ELSE 'unknown'
|
||||
END
|
||||
WHERE source_channel IS NULL
|
||||
""")
|
||||
conn.commit()
|
||||
logger.info("Migration v22: added source_channel to prs + backfilled from branch prefix")
|
||||
|
||||
if current < 23:
|
||||
conn.execute(
|
||||
"CREATE INDEX IF NOT EXISTS idx_prs_source_path ON prs(source_path) WHERE source_path IS NOT NULL"
|
||||
)
|
||||
conn.commit()
|
||||
logger.info("Migration v23: added idx_prs_source_path for auto-close dedup lookup")
|
||||
|
||||
if current < 24:
|
||||
# Event-sourced contributions table + alias table + kind column on contributors.
|
||||
# Non-breaking: contributors table stays; events are written in addition via
|
||||
# double-write in merge.py. Leaderboards switch to events in Phase B.
|
||||
conn.executescript("""
|
||||
CREATE TABLE IF NOT EXISTS contribution_events (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
handle TEXT NOT NULL,
|
||||
kind TEXT NOT NULL DEFAULT 'person',
|
||||
role TEXT NOT NULL,
|
||||
weight REAL NOT NULL,
|
||||
pr_number INTEGER NOT NULL,
|
||||
claim_path TEXT,
|
||||
domain TEXT,
|
||||
channel TEXT,
|
||||
timestamp TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
);
|
||||
-- Partial unique indexes handle SQLite's NULL != NULL UNIQUE semantics.
|
||||
-- Per-claim events dedup on 4-tuple; PR-level events dedup on 3-tuple.
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_ce_unique_claim ON contribution_events(
|
||||
handle, role, pr_number, claim_path
|
||||
) WHERE claim_path IS NOT NULL;
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_ce_unique_pr ON contribution_events(
|
||||
handle, role, pr_number
|
||||
) WHERE claim_path IS NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_ce_handle_ts ON contribution_events(handle, timestamp);
|
||||
CREATE INDEX IF NOT EXISTS idx_ce_domain_ts ON contribution_events(domain, timestamp);
|
||||
CREATE INDEX IF NOT EXISTS idx_ce_pr ON contribution_events(pr_number);
|
||||
CREATE INDEX IF NOT EXISTS idx_ce_role_ts ON contribution_events(role, timestamp);
|
||||
CREATE INDEX IF NOT EXISTS idx_ce_kind_ts ON contribution_events(kind, timestamp);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS contributor_aliases (
|
||||
alias TEXT PRIMARY KEY,
|
||||
canonical TEXT NOT NULL,
|
||||
created_at TEXT DEFAULT (datetime('now'))
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_aliases_canonical ON contributor_aliases(canonical);
|
||||
""")
|
||||
try:
|
||||
conn.execute("ALTER TABLE contributors ADD COLUMN kind TEXT DEFAULT 'person'")
|
||||
except sqlite3.OperationalError:
|
||||
pass # column already exists
|
||||
# Seed known aliases. @thesensatore → thesensatore catches the zombie row Argus flagged.
|
||||
# cameron → cameron-s1 reconciles the Leo-flagged missing contributor.
|
||||
conn.executemany(
|
||||
"INSERT OR IGNORE INTO contributor_aliases (alias, canonical) VALUES (?, ?)",
|
||||
[
|
||||
("@thesensatore", "thesensatore"),
|
||||
("cameron", "cameron-s1"),
|
||||
],
|
||||
)
|
||||
# Seed kind='agent' for known Pentagon agents so the events writer picks it up.
|
||||
# Must stay in sync with lib/attribution.PENTAGON_AGENTS — drift causes
|
||||
# contributors.kind to disagree with classify_kind() output for future
|
||||
# inserts. (Ganymede review: "pipeline" was missing until Apr 24.)
|
||||
pentagon_agents = [
|
||||
"rio", "leo", "theseus", "vida", "clay", "astra",
|
||||
"oberon", "argus", "rhea", "ganymede", "epimetheus", "hermes", "ship",
|
||||
"pipeline",
|
||||
]
|
||||
for agent in pentagon_agents:
|
||||
conn.execute(
|
||||
"UPDATE contributors SET kind = 'agent' WHERE handle = ?",
|
||||
(agent,),
|
||||
)
|
||||
conn.commit()
|
||||
logger.info("Migration v24: added contribution_events + contributor_aliases tables, kind column")
|
||||
|
||||
if current < 25:
|
||||
# v24 seeded 13 Pentagon agents but missed "pipeline" — classify_kind()
|
||||
# treats it as agent so contributors.kind drifted from event-insert output.
|
||||
# Idempotent corrective UPDATE: fresh installs have no "pipeline" row
|
||||
# (no-op), upgraded envs flip it if it exists. (Ganymede review Apr 24.)
|
||||
conn.execute(
|
||||
"UPDATE contributors SET kind = 'agent' WHERE handle = 'pipeline'"
|
||||
)
|
||||
conn.commit()
|
||||
logger.info("Migration v25: patched kind='agent' for pipeline handle")
|
||||
|
||||
if current < 26:
|
||||
# Add publishers + contributor_identities. Non-breaking — new tables only.
|
||||
# No existing data moved. Classification into publishers happens via a
|
||||
# separate script (scripts/reclassify-contributors.py) with Cory-reviewed
|
||||
# seed list. CHECK constraint on contributors.kind deferred to v27 after
|
||||
# classification completes. (Apr 24 Cory directive: "fix schema, don't
|
||||
# filter output" — separate contributors from publishers at the data layer.)
|
||||
conn.executescript("""
|
||||
CREATE TABLE IF NOT EXISTS publishers (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
name TEXT NOT NULL UNIQUE,
|
||||
kind TEXT CHECK(kind IN ('news', 'academic', 'social_platform', 'podcast', 'self', 'internal', 'legal', 'government', 'research_org', 'commercial', 'other')),
|
||||
url_pattern TEXT,
|
||||
created_at TEXT DEFAULT (datetime('now'))
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_publishers_name ON publishers(name);
|
||||
CREATE INDEX IF NOT EXISTS idx_publishers_kind ON publishers(kind);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS contributor_identities (
|
||||
contributor_handle TEXT NOT NULL,
|
||||
platform TEXT NOT NULL CHECK(platform IN ('x', 'telegram', 'github', 'email', 'web', 'internal')),
|
||||
platform_handle TEXT NOT NULL,
|
||||
verified INTEGER DEFAULT 0,
|
||||
created_at TEXT DEFAULT (datetime('now')),
|
||||
PRIMARY KEY (platform, platform_handle)
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_identities_contributor ON contributor_identities(contributor_handle);
|
||||
""")
|
||||
# Extend sources with provenance columns. ALTER TABLE ADD COLUMN is
|
||||
# idempotent-safe via try/except because SQLite doesn't support IF NOT EXISTS
|
||||
# on column adds.
|
||||
for col_sql in (
|
||||
"ALTER TABLE sources ADD COLUMN publisher_id INTEGER REFERENCES publishers(id)",
|
||||
"ALTER TABLE sources ADD COLUMN content_type TEXT",
|
||||
"ALTER TABLE sources ADD COLUMN original_author TEXT",
|
||||
"ALTER TABLE sources ADD COLUMN original_author_handle TEXT REFERENCES contributors(handle)",
|
||||
):
|
||||
try:
|
||||
conn.execute(col_sql)
|
||||
except sqlite3.OperationalError as e:
|
||||
if "duplicate column" not in str(e).lower():
|
||||
raise
|
||||
conn.commit()
|
||||
logger.info("Migration v26: added publishers + contributor_identities tables + sources provenance columns")
|
||||
|
||||
if current < SCHEMA_VERSION:
|
||||
conn.execute(
|
||||
"INSERT OR REPLACE INTO schema_version (version) VALUES (?)",
|
||||
(SCHEMA_VERSION,),
|
||||
)
|
||||
conn.commit() # Explicit commit — executescript auto-commits DDL but not subsequent DML
|
||||
logger.info("Database migrated to schema version %d", SCHEMA_VERSION)
|
||||
else:
|
||||
logger.debug("Database at schema version %d", current)
|
||||
|
|
@ -269,6 +864,36 @@ def audit(conn: sqlite3.Connection, stage: str, event: str, detail: str = None):
|
|||
)
|
||||
|
||||
|
||||
def record_review(
|
||||
conn: sqlite3.Connection,
|
||||
pr_number: int,
|
||||
outcome: str,
|
||||
*,
|
||||
domain: str = None,
|
||||
agent: str = None,
|
||||
reviewer: str = None,
|
||||
reviewer_model: str = None,
|
||||
rejection_reason: str = None,
|
||||
disagreement_type: str = None,
|
||||
notes: str = None,
|
||||
claims_in_batch: int = None,
|
||||
):
|
||||
"""Write a review record. Called at each eval verdict point."""
|
||||
conn.execute(
|
||||
"""INSERT INTO review_records
|
||||
(pr_number, domain, agent, reviewer, reviewer_model, outcome,
|
||||
rejection_reason, disagreement_type, notes, batch_id, claims_in_batch)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
||||
(
|
||||
pr_number, domain, agent, reviewer, reviewer_model, outcome,
|
||||
rejection_reason, disagreement_type,
|
||||
notes[:4000] if notes else None,
|
||||
str(pr_number), # batch_id = PR number
|
||||
claims_in_batch,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def append_priority_log(conn: sqlite3.Connection, path: str, stage: str, priority: str, reasoning: str):
|
||||
"""Append a priority assessment to a source's priority_log.
|
||||
|
||||
|
|
@ -296,6 +921,31 @@ def append_priority_log(conn: sqlite3.Connection, path: str, stage: str, priorit
|
|||
raise
|
||||
|
||||
|
||||
def insert_response_audit(conn: sqlite3.Connection, **kwargs):
|
||||
"""Insert a response audit record. All fields optional except query."""
|
||||
cols = [
|
||||
"timestamp", "chat_id", "user", "agent", "model", "query",
|
||||
"conversation_window", "entities_matched", "claims_matched",
|
||||
"retrieval_layers_hit", "retrieval_gap", "market_data",
|
||||
"research_context", "kb_context_text", "tool_calls",
|
||||
"raw_response", "display_response", "confidence_score",
|
||||
"response_time_ms",
|
||||
# Eval pipeline columns (v10)
|
||||
"prompt_tokens", "completion_tokens", "generation_cost",
|
||||
"embedding_cost", "total_cost", "blocked", "block_reason",
|
||||
"query_type",
|
||||
]
|
||||
present = {k: v for k, v in kwargs.items() if k in cols and v is not None}
|
||||
if not present:
|
||||
return
|
||||
col_names = ", ".join(present.keys())
|
||||
placeholders = ", ".join("?" for _ in present)
|
||||
conn.execute(
|
||||
f"INSERT INTO response_audit ({col_names}) VALUES ({placeholders})",
|
||||
tuple(present.values()),
|
||||
)
|
||||
|
||||
|
||||
def set_priority(conn: sqlite3.Connection, path: str, priority: str, reason: str = "human override"):
|
||||
"""Set a source's authoritative priority. Used for human overrides and initial triage."""
|
||||
conn.execute(
|
||||
|
|
|
|||
113
lib/dedup.py
Normal file
113
lib/dedup.py
Normal file
|
|
@ -0,0 +1,113 @@
|
|||
"""Evidence block deduplication for enrichment idempotency.
|
||||
|
||||
Removes duplicate '### Additional Evidence' and '### Auto-enrichment' blocks
|
||||
that arise from rebase of enrichment branches. (Leo: PRs #1751, #1752)
|
||||
"""
|
||||
|
||||
import logging
|
||||
import re
|
||||
|
||||
logger = logging.getLogger("pipeline.dedup")
|
||||
|
||||
# Matches start of an evidence block header
|
||||
_EVIDENCE_HEADER = re.compile(
|
||||
r'^### (?:Additional Evidence|Auto-enrichment) \(',
|
||||
re.MULTILINE,
|
||||
)
|
||||
|
||||
# Extracts source key from the *Source: ...* line
|
||||
_SOURCE_LINE = re.compile(r'^\*Source: (.+)\*', re.MULTILINE)
|
||||
|
||||
|
||||
def dedup_evidence_blocks(content: str) -> str:
|
||||
"""Remove duplicate evidence blocks from a claim file.
|
||||
|
||||
After rebase, two enrichment branches can produce duplicate
|
||||
evidence blocks with the same source reference. Keeps the first
|
||||
occurrence of each source, removes subsequent duplicates.
|
||||
"""
|
||||
# Find all evidence block start positions
|
||||
headers = list(_EVIDENCE_HEADER.finditer(content))
|
||||
if len(headers) < 2:
|
||||
return content
|
||||
|
||||
# Parse each block: find its extent and source key
|
||||
blocks = [] # (start, end, source_key)
|
||||
for i, hdr in enumerate(headers):
|
||||
block_start = hdr.start()
|
||||
# Block extends to just before the next evidence header
|
||||
# (or to end of file for the last block).
|
||||
# But we need to be careful: content after the last evidence
|
||||
# block that ISN'T evidence (Relevant Notes, ---, etc.) should
|
||||
# NOT be considered part of the block.
|
||||
if i + 1 < len(headers):
|
||||
block_end = headers[i + 1].start()
|
||||
else:
|
||||
# Last block: find where evidence content ends.
|
||||
# Look for the next non-evidence section marker after the
|
||||
# source line and evidence body.
|
||||
rest = content[block_start:]
|
||||
# Find end of this evidence block's text by looking for
|
||||
# a section boundary: ---, ## heading, Relevant Notes, Topics
|
||||
# Skip the first line (the ### header itself)
|
||||
lines = rest.split("\n")
|
||||
end_offset = len(rest)
|
||||
past_source = False
|
||||
past_body = False
|
||||
line_pos = 0
|
||||
for j, line in enumerate(lines):
|
||||
if j == 0:
|
||||
line_pos += len(line) + 1
|
||||
continue
|
||||
if line.startswith("*Source:"):
|
||||
past_source = True
|
||||
line_pos += len(line) + 1
|
||||
continue
|
||||
if past_source and line.strip() == "":
|
||||
# Blank line after source — start of body
|
||||
line_pos += len(line) + 1
|
||||
continue
|
||||
if past_source and line.strip():
|
||||
past_body = True
|
||||
# After we've seen body content, a blank line followed by
|
||||
# a section marker means the block is done
|
||||
if past_body and (
|
||||
line.startswith("---")
|
||||
or line.startswith("## ")
|
||||
or line.startswith("### ") # next evidence or other heading
|
||||
or re.match(r'^(?:Relevant Notes|Topics)\s*:?', line)
|
||||
):
|
||||
end_offset = line_pos
|
||||
break
|
||||
line_pos += len(line) + 1
|
||||
|
||||
block_end = block_start + end_offset
|
||||
|
||||
# Extract source key
|
||||
block_text = content[block_start:block_end]
|
||||
src_match = _SOURCE_LINE.search(block_text)
|
||||
source_key = src_match.group(1).strip() if src_match else f"_unknown_{i}"
|
||||
|
||||
blocks.append((block_start, block_end, source_key))
|
||||
|
||||
# Now rebuild content, skipping duplicate sources
|
||||
seen: set[str] = set()
|
||||
result_parts = [content[:blocks[0][0]]]
|
||||
removed = 0
|
||||
|
||||
for start, end, source_key in blocks:
|
||||
if source_key in seen:
|
||||
removed += 1
|
||||
continue
|
||||
seen.add(source_key)
|
||||
result_parts.append(content[start:end])
|
||||
|
||||
# Append any content after the last block
|
||||
last_end = blocks[-1][1]
|
||||
if last_end < len(content):
|
||||
result_parts.append(content[last_end:])
|
||||
|
||||
if removed > 0:
|
||||
logger.info("Deduped %d duplicate evidence block(s)", removed)
|
||||
|
||||
return "".join(result_parts)
|
||||
208
lib/digest.py
Normal file
208
lib/digest.py
Normal file
|
|
@ -0,0 +1,208 @@
|
|||
"""Daily digest — sends Cory a summary of all Tier 3 activity at 8am London time.
|
||||
|
||||
Aggregates: merged claims (with insight summaries), pipeline metrics, agent activity,
|
||||
pending review items. Runs as a scheduled job in bot.py.
|
||||
|
||||
Epimetheus owns this module.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import sqlite3
|
||||
from datetime import datetime, timezone, timedelta
|
||||
from zoneinfo import ZoneInfo
|
||||
|
||||
logger = logging.getLogger("telegram.digest")
|
||||
|
||||
LONDON_TZ = ZoneInfo("Europe/London")
|
||||
DIGEST_HOUR_LONDON = 8 # 8am London time (auto-adjusts for BST/GMT)
|
||||
|
||||
|
||||
def next_digest_time() -> datetime:
|
||||
"""Calculate the next 8am London time as a UTC datetime.
|
||||
|
||||
Handles BST/GMT transitions automatically via zoneinfo.
|
||||
"""
|
||||
now = datetime.now(LONDON_TZ)
|
||||
target = now.replace(hour=DIGEST_HOUR_LONDON, minute=0, second=0, microsecond=0)
|
||||
if target <= now:
|
||||
target += timedelta(days=1)
|
||||
return target.astimezone(timezone.utc)
|
||||
|
||||
|
||||
def _get_merged_claims_24h(conn: sqlite3.Connection) -> list[dict]:
|
||||
"""Get PRs merged in the last 24 hours with domain and branch info."""
|
||||
rows = conn.execute(
|
||||
"""SELECT number, branch, domain, agent, commit_type, merged_at, description
|
||||
FROM prs
|
||||
WHERE merged_at > datetime('now', '-24 hours')
|
||||
AND status = 'merged'
|
||||
ORDER BY merged_at DESC""",
|
||||
).fetchall()
|
||||
return [dict(r) for r in rows]
|
||||
|
||||
|
||||
def _get_pipeline_metrics_24h(conn: sqlite3.Connection) -> dict:
|
||||
"""Get pipeline activity metrics for the last 24 hours."""
|
||||
total_merged = conn.execute(
|
||||
"SELECT COUNT(*) FROM prs WHERE merged_at > datetime('now', '-24 hours') AND status = 'merged'"
|
||||
).fetchone()[0]
|
||||
|
||||
total_closed = conn.execute(
|
||||
"SELECT COUNT(*) FROM prs WHERE status = 'closed' AND created_at > datetime('now', '-24 hours')"
|
||||
).fetchone()[0]
|
||||
|
||||
total_conflict = conn.execute(
|
||||
"SELECT COUNT(*) FROM prs WHERE status IN ('conflict', 'conflict_permanent') AND created_at > datetime('now', '-24 hours')"
|
||||
).fetchone()[0]
|
||||
|
||||
total_open = conn.execute(
|
||||
"SELECT COUNT(*) FROM prs WHERE status IN ('open', 'reviewing', 'approved', 'merging')"
|
||||
).fetchone()[0]
|
||||
|
||||
# Approval rate (last 24h)
|
||||
evaluated = conn.execute(
|
||||
"SELECT COUNT(*) FROM prs WHERE leo_verdict IN ('approve', 'request_changes') AND created_at > datetime('now', '-24 hours')"
|
||||
).fetchone()[0]
|
||||
approved = conn.execute(
|
||||
"SELECT COUNT(*) FROM prs WHERE leo_verdict = 'approve' AND created_at > datetime('now', '-24 hours')"
|
||||
).fetchone()[0]
|
||||
approval_rate = (approved / evaluated * 100) if evaluated > 0 else 0
|
||||
|
||||
return {
|
||||
"merged": total_merged,
|
||||
"closed": total_closed,
|
||||
"conflict": total_conflict,
|
||||
"open": total_open,
|
||||
"evaluated": evaluated,
|
||||
"approved": approved,
|
||||
"approval_rate": approval_rate,
|
||||
}
|
||||
|
||||
|
||||
def _get_agent_activity_24h(conn: sqlite3.Connection) -> dict[str, int]:
|
||||
"""Get PR count by agent for the last 24 hours."""
|
||||
rows = conn.execute(
|
||||
"""SELECT agent, COUNT(*) as cnt
|
||||
FROM prs
|
||||
WHERE created_at > datetime('now', '-24 hours')
|
||||
AND agent IS NOT NULL
|
||||
GROUP BY agent
|
||||
ORDER BY cnt DESC""",
|
||||
).fetchall()
|
||||
return {r["agent"]: r["cnt"] for r in rows}
|
||||
|
||||
|
||||
def _get_pending_review_count(conn: sqlite3.Connection) -> int:
|
||||
"""Count PRs awaiting review."""
|
||||
return conn.execute(
|
||||
"SELECT COUNT(*) FROM prs WHERE status IN ('open', 'reviewing')"
|
||||
).fetchone()[0]
|
||||
|
||||
|
||||
def _extract_claim_title(branch: str) -> str:
|
||||
"""Extract a human-readable claim title from a branch name.
|
||||
|
||||
Branch format: extract/source-slug or agent/description
|
||||
"""
|
||||
# Strip prefix (extract/, research/, theseus/, etc.)
|
||||
parts = branch.split("/", 1)
|
||||
slug = parts[1] if len(parts) > 1 else parts[0]
|
||||
# Convert slug to readable title
|
||||
return slug.replace("-", " ").replace("_", " ").title()
|
||||
|
||||
|
||||
|
||||
def format_digest(
|
||||
merged_claims: list[dict],
|
||||
metrics: dict,
|
||||
agent_activity: dict[str, int],
|
||||
pending_review: int,
|
||||
) -> str:
|
||||
"""Format the daily digest message."""
|
||||
now = datetime.now(timezone.utc)
|
||||
date_str = now.strftime("%Y-%m-%d")
|
||||
|
||||
parts = [f"DAILY DIGEST — {date_str}", ""]
|
||||
|
||||
# Merged claims section
|
||||
if merged_claims:
|
||||
# Group by domain
|
||||
by_domain: dict[str, list] = {}
|
||||
for claim in merged_claims:
|
||||
domain = claim.get("domain") or "unknown"
|
||||
by_domain.setdefault(domain, []).append(claim)
|
||||
|
||||
parts.append(f"CLAIMS MERGED ({len(merged_claims)})")
|
||||
for domain, claims in sorted(by_domain.items()):
|
||||
for c in claims:
|
||||
# Use real description from frontmatter if available, fall back to slug title
|
||||
desc = c.get("description")
|
||||
if desc:
|
||||
# Take first description if multiple (pipe-delimited)
|
||||
display = desc.split(" | ")[0]
|
||||
if len(display) > 120:
|
||||
display = display[:117] + "..."
|
||||
else:
|
||||
display = _extract_claim_title(c.get("branch", "unknown"))
|
||||
commit_type = c.get("commit_type", "")
|
||||
type_tag = f"[{commit_type}] " if commit_type else ""
|
||||
parts.append(f" {type_tag}{display} ({domain})")
|
||||
parts.append("")
|
||||
else:
|
||||
parts.extend(["CLAIMS MERGED (0)", " No claims merged in the last 24h", ""])
|
||||
|
||||
# Pipeline metrics
|
||||
success_rate = 0
|
||||
total_attempted = metrics["merged"] + metrics["closed"] + metrics["conflict"]
|
||||
if total_attempted > 0:
|
||||
success_rate = metrics["merged"] / total_attempted * 100
|
||||
|
||||
parts.append("PIPELINE")
|
||||
parts.append(f" Merged: {metrics['merged']} | Closed: {metrics['closed']} | Conflicts: {metrics['conflict']}")
|
||||
parts.append(f" Success rate: {success_rate:.0f}% | Approval rate: {metrics['approval_rate']:.0f}%")
|
||||
parts.append(f" Open PRs: {metrics['open']}")
|
||||
parts.append("")
|
||||
|
||||
# Agent activity
|
||||
if agent_activity:
|
||||
parts.append("AGENTS")
|
||||
for agent, count in agent_activity.items():
|
||||
parts.append(f" {agent}: {count} PRs")
|
||||
parts.append("")
|
||||
else:
|
||||
parts.extend(["AGENTS", " No agent activity in the last 24h", ""])
|
||||
|
||||
# Pending review
|
||||
if pending_review > 0:
|
||||
parts.append(f"PENDING YOUR REVIEW: {pending_review}")
|
||||
else:
|
||||
parts.append("PENDING YOUR REVIEW: 0")
|
||||
|
||||
return "\n".join(parts)
|
||||
|
||||
|
||||
async def send_daily_digest(context):
|
||||
"""Send daily digest to admin chat. Scheduled job."""
|
||||
conn = context.bot_data.get("approval_conn")
|
||||
admin_chat_id = context.bot_data.get("admin_chat_id")
|
||||
|
||||
if not conn or not admin_chat_id:
|
||||
logger.debug("Digest skipped — no DB connection or admin chat ID")
|
||||
return
|
||||
|
||||
try:
|
||||
merged = _get_merged_claims_24h(conn)
|
||||
metrics = _get_pipeline_metrics_24h(conn)
|
||||
activity = _get_agent_activity_24h(conn)
|
||||
pending = _get_pending_review_count(conn)
|
||||
|
||||
text = format_digest(merged, metrics, activity, pending)
|
||||
|
||||
await context.bot.send_message(
|
||||
chat_id=admin_chat_id,
|
||||
text=text,
|
||||
)
|
||||
logger.info("Daily digest sent (%d claims, %d agents active)",
|
||||
len(merged), len(activity))
|
||||
except Exception as e:
|
||||
logger.error("Failed to send daily digest: %s", e)
|
||||
|
|
@ -37,6 +37,11 @@ _AGENT_PRIMARY_DOMAIN: dict[str, str] = {
|
|||
"leo": "grand-strategy",
|
||||
}
|
||||
|
||||
_INGESTION_SOURCE_DOMAIN: dict[str, str] = {
|
||||
"futardio": "internet-finance",
|
||||
"metadao": "internet-finance",
|
||||
}
|
||||
|
||||
|
||||
def agent_for_domain(domain: str | None) -> str:
|
||||
"""Get the reviewing agent for a domain. Falls back to Leo."""
|
||||
|
|
@ -82,6 +87,14 @@ def detect_domain_from_branch(branch: str) -> str | None:
|
|||
"""Extract domain from branch name like 'rio/claims-futarchy' → 'internet-finance'.
|
||||
|
||||
Uses agent prefix → primary domain mapping for pipeline branches.
|
||||
For ingestion branches, checks the rest of the name for source-type hints.
|
||||
"""
|
||||
prefix = branch.split("/")[0].lower() if "/" in branch else ""
|
||||
return _AGENT_PRIMARY_DOMAIN.get(prefix)
|
||||
if prefix in _AGENT_PRIMARY_DOMAIN:
|
||||
return _AGENT_PRIMARY_DOMAIN[prefix]
|
||||
if prefix == "ingestion":
|
||||
rest = branch.split("/", 1)[1].lower() if "/" in branch else ""
|
||||
for source_key, domain in _INGESTION_SOURCE_DOMAIN.items():
|
||||
if source_key in rest:
|
||||
return domain
|
||||
return None
|
||||
|
|
|
|||
|
|
@ -107,6 +107,10 @@ def _apply_claim_enrichment(claim_path: str, evidence: str, pr_number: int,
|
|||
if not content:
|
||||
return False, f"target claim empty: {claim_path}"
|
||||
|
||||
# Dedup: skip if this PR already enriched this claim (idempotency)
|
||||
if f"PR #{pr_number}" in content:
|
||||
return False, f"already enriched by PR #{pr_number}"
|
||||
|
||||
enrichment_block = (
|
||||
f"\n\n### Auto-enrichment (near-duplicate conversion, similarity={similarity:.2f})\n"
|
||||
f"*Source: PR #{pr_number} — \"{original_title}\"*\n"
|
||||
|
|
|
|||
260
lib/eval_actions.py
Normal file
260
lib/eval_actions.py
Normal file
|
|
@ -0,0 +1,260 @@
|
|||
"""PR disposition actions — async Forgejo + DB operations for end-of-eval decisions.
|
||||
|
||||
Extracted from evaluate.py to isolate the "do something to this PR" functions
|
||||
from orchestration logic. Contains:
|
||||
|
||||
- post_formal_approvals: submit Forgejo reviews from 2 agents (not PR author)
|
||||
- terminate_pr: close PR, post rejection comment, requeue source
|
||||
- dispose_rejected_pr: disposition logic for rejected PRs on attempt 2+
|
||||
|
||||
All functions are async (Forgejo API calls). Dependencies: forgejo, db, config,
|
||||
pr_state, feedback, eval_parse.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
|
||||
from . import config, db
|
||||
from .eval_parse import classify_issues
|
||||
from .feedback import format_rejection_comment
|
||||
from .forgejo import api as forgejo_api, get_agent_token, get_pr_diff, repo_path
|
||||
from .github_feedback import on_closed, on_eval_complete
|
||||
from .pr_state import close_pr
|
||||
|
||||
logger = logging.getLogger("pipeline.eval_actions")
|
||||
|
||||
|
||||
async def post_formal_approvals(pr_number: int, pr_author: str):
|
||||
"""Submit formal Forgejo reviews from 2 agents (not the PR author)."""
|
||||
approvals = 0
|
||||
for agent_name in ["leo", "vida", "theseus", "clay", "astra", "rio"]:
|
||||
if agent_name == pr_author:
|
||||
continue
|
||||
if approvals >= 2:
|
||||
break
|
||||
token = get_agent_token(agent_name)
|
||||
if token:
|
||||
result = await forgejo_api(
|
||||
"POST",
|
||||
repo_path(f"pulls/{pr_number}/reviews"),
|
||||
{"body": "Approved.", "event": "APPROVED"},
|
||||
token=token,
|
||||
)
|
||||
if result is not None:
|
||||
approvals += 1
|
||||
logger.debug("Formal approval for PR #%d by %s (%d/2)", pr_number, agent_name, approvals)
|
||||
|
||||
|
||||
async def terminate_pr(conn, pr_number: int, reason: str):
|
||||
"""Terminal state: close PR on Forgejo, mark source needs_human."""
|
||||
# Get issue tags for structured feedback
|
||||
row = conn.execute("SELECT eval_issues, agent FROM prs WHERE number = ?", (pr_number,)).fetchone()
|
||||
issues = []
|
||||
if row and row["eval_issues"]:
|
||||
try:
|
||||
issues = json.loads(row["eval_issues"])
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
pass
|
||||
|
||||
# Post structured rejection comment with quality gate guidance
|
||||
if issues:
|
||||
feedback_body = format_rejection_comment(issues, source="eval_terminal")
|
||||
comment_body = (
|
||||
f"**Closed by eval pipeline** — {reason}.\n\n"
|
||||
f"Evaluated {config.MAX_EVAL_ATTEMPTS} times without passing. "
|
||||
f"Source will be re-queued with feedback.\n\n"
|
||||
f"{feedback_body}"
|
||||
)
|
||||
else:
|
||||
comment_body = (
|
||||
f"**Closed by eval pipeline** — {reason}.\n\n"
|
||||
f"Evaluated {config.MAX_EVAL_ATTEMPTS} times without passing. "
|
||||
f"Source will be re-queued with feedback."
|
||||
)
|
||||
|
||||
await forgejo_api(
|
||||
"POST",
|
||||
repo_path(f"issues/{pr_number}/comments"),
|
||||
{"body": comment_body},
|
||||
)
|
||||
closed = await close_pr(conn, pr_number, last_error=reason)
|
||||
if not closed:
|
||||
logger.warning("PR #%d: Forgejo close failed — skipping source requeue, will retry next cycle", pr_number)
|
||||
return
|
||||
|
||||
try:
|
||||
await on_closed(conn, pr_number, reason=reason)
|
||||
except Exception:
|
||||
logger.exception("PR #%d: GitHub close feedback failed (non-fatal)", pr_number)
|
||||
|
||||
# Tag source for re-extraction with feedback
|
||||
cursor = conn.execute(
|
||||
"""UPDATE sources SET status = 'needs_reextraction',
|
||||
updated_at = datetime('now')
|
||||
WHERE path = (SELECT source_path FROM prs WHERE number = ?)""",
|
||||
(pr_number,),
|
||||
)
|
||||
if cursor.rowcount == 0:
|
||||
logger.warning("PR #%d: no source_path linked — source not requeued for re-extraction", pr_number)
|
||||
|
||||
db.audit(
|
||||
conn,
|
||||
"evaluate",
|
||||
"pr_terminated",
|
||||
json.dumps(
|
||||
{
|
||||
"pr": pr_number,
|
||||
"reason": reason,
|
||||
}
|
||||
),
|
||||
)
|
||||
logger.info("PR #%d: TERMINATED — %s", pr_number, reason)
|
||||
|
||||
|
||||
async def dispose_rejected_pr(conn, pr_number: int, eval_attempts: int, all_issues: list[str]):
|
||||
"""Disposition logic for rejected PRs on attempt 2+.
|
||||
|
||||
Auto-close gate (all attempts): near-duplicate of an already-merged PR for
|
||||
the same source — close immediately. Avoids the Apr 22 runaway-damage
|
||||
pattern where a source extracted 20+ times in a short window produced
|
||||
dozens of open PRs that all had to be closed manually.
|
||||
|
||||
Attempt 1: normal — back to open, wait for fix.
|
||||
Attempt 2: check issue classification.
|
||||
- Mechanical only: keep open for one more attempt (auto-fix future).
|
||||
- Substantive or mixed: close PR, requeue source.
|
||||
Attempt 3+: terminal.
|
||||
"""
|
||||
# Auto-close near-duplicate when a merged sibling for the same source exists.
|
||||
# Runs before the attempt-count branches so it catches the common runaway
|
||||
# case on attempt 1 instead of waiting for attempt 2's terminate path.
|
||||
#
|
||||
# Exact-match requirement (Ganymede review): compound rejections like
|
||||
# ["near_duplicate", "factual_discrepancy"] carry signal about the merged
|
||||
# sibling being wrong or limited — we want humans to see those. Only the
|
||||
# pure single-issue case is safe to auto-close.
|
||||
if all_issues == ["near_duplicate"]:
|
||||
existing_merged = conn.execute(
|
||||
"""SELECT p2.number, p1.source_path FROM prs p1
|
||||
JOIN prs p2 ON p2.source_path = p1.source_path
|
||||
WHERE p1.number = ?
|
||||
AND p1.source_path IS NOT NULL
|
||||
AND p2.number != p1.number
|
||||
AND p2.status = 'merged'
|
||||
LIMIT 1""",
|
||||
(pr_number,),
|
||||
).fetchone()
|
||||
if existing_merged:
|
||||
sibling = existing_merged[0]
|
||||
source_path = existing_merged[1]
|
||||
|
||||
# Enrichment guard: LLM reviewers can flag enrichment prose as
|
||||
# "redundant" via eval_parse regex, tagging near_duplicate even
|
||||
# though validate.py's structural check only fires on NEW files.
|
||||
# If the PR only MODIFIES existing files (no "new file mode" in
|
||||
# diff), it's an enrichment — skip auto-close so a human reviews.
|
||||
#
|
||||
# 10s timeout bounds damage when Forgejo is wedged (Apr 22 incident:
|
||||
# hung for 2.5h). Conservative fallback: skip auto-close on any
|
||||
# failure — fall through to normal rejection path.
|
||||
try:
|
||||
diff = await asyncio.wait_for(get_pr_diff(pr_number), timeout=10)
|
||||
except (asyncio.TimeoutError, Exception):
|
||||
logger.warning(
|
||||
"PR #%d: diff fetch failed/timed out for near-dup guard — skipping auto-close",
|
||||
pr_number, exc_info=True,
|
||||
)
|
||||
diff = None
|
||||
|
||||
if not diff:
|
||||
# None or empty — conservative fallback, fall through to attempt-count branches
|
||||
pass
|
||||
elif "new file mode" not in diff:
|
||||
logger.info(
|
||||
"PR #%d: near_duplicate but modifies-only (enrichment) — skipping auto-close",
|
||||
pr_number,
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
"PR #%d: auto-closing near-duplicate of merged PR #%d (same source)",
|
||||
pr_number, sibling,
|
||||
)
|
||||
# Post a brief explanation before closing (best-effort — non-fatal)
|
||||
try:
|
||||
await forgejo_api(
|
||||
"POST",
|
||||
repo_path(f"issues/{pr_number}/comments"),
|
||||
{"body": (
|
||||
f"Auto-closed: near-duplicate of already-merged PR "
|
||||
f"#{sibling} (same source: `{source_path}`)."
|
||||
)},
|
||||
)
|
||||
except Exception:
|
||||
logger.debug("PR #%d: auto-close comment failed (non-fatal)", pr_number, exc_info=True)
|
||||
await close_pr(
|
||||
conn, pr_number,
|
||||
last_error=f"auto_closed_near_duplicate: merged sibling #{sibling}",
|
||||
)
|
||||
db.audit(
|
||||
conn, "evaluate", "auto_closed_near_duplicate",
|
||||
json.dumps({
|
||||
"pr": pr_number,
|
||||
"merged_sibling": sibling,
|
||||
"source_path": source_path,
|
||||
"eval_attempts": eval_attempts,
|
||||
}),
|
||||
)
|
||||
return
|
||||
|
||||
if eval_attempts < 2:
|
||||
# Attempt 1: post structured feedback so agent learns, but don't close
|
||||
if all_issues:
|
||||
feedback_body = format_rejection_comment(all_issues, source="eval_attempt_1")
|
||||
await forgejo_api(
|
||||
"POST",
|
||||
repo_path(f"issues/{pr_number}/comments"),
|
||||
{"body": feedback_body},
|
||||
)
|
||||
return
|
||||
|
||||
classification = classify_issues(all_issues)
|
||||
|
||||
if eval_attempts >= config.MAX_EVAL_ATTEMPTS:
|
||||
# Terminal
|
||||
await terminate_pr(conn, pr_number, f"eval budget exhausted after {eval_attempts} attempts")
|
||||
return
|
||||
|
||||
if classification == "mechanical":
|
||||
# Mechanical issues only — keep open for one more attempt.
|
||||
# Future: auto-fix module will push fixes here.
|
||||
logger.info(
|
||||
"PR #%d: attempt %d, mechanical issues only (%s) — keeping open for fix attempt",
|
||||
pr_number,
|
||||
eval_attempts,
|
||||
all_issues,
|
||||
)
|
||||
db.audit(
|
||||
conn,
|
||||
"evaluate",
|
||||
"mechanical_retry",
|
||||
json.dumps(
|
||||
{
|
||||
"pr": pr_number,
|
||||
"attempt": eval_attempts,
|
||||
"issues": all_issues,
|
||||
}
|
||||
),
|
||||
)
|
||||
else:
|
||||
# Substantive, mixed, or unknown — close and requeue
|
||||
logger.info(
|
||||
"PR #%d: attempt %d, %s issues (%s) — closing and requeuing source",
|
||||
pr_number,
|
||||
eval_attempts,
|
||||
classification,
|
||||
all_issues,
|
||||
)
|
||||
await terminate_pr(
|
||||
conn, pr_number, f"substantive issues after {eval_attempts} attempts: {', '.join(all_issues)}"
|
||||
)
|
||||
434
lib/eval_parse.py
Normal file
434
lib/eval_parse.py
Normal file
|
|
@ -0,0 +1,434 @@
|
|||
"""Pure parsing functions for the eval stage — zero I/O, zero async.
|
||||
|
||||
Extracted from evaluate.py to isolate testable parsing logic from
|
||||
orchestration, DB, and Forgejo API calls.
|
||||
|
||||
Contents:
|
||||
- Diff helpers: filter, classify, tier routing
|
||||
- Verdict/issue parsing: structured tags + prose inference
|
||||
- Batch response parsing: fan-out validation
|
||||
|
||||
All functions are pure (input → output). The only external dependency
|
||||
is config.MECHANICAL_ISSUE_TAGS / config.SUBSTANTIVE_ISSUE_TAGS for
|
||||
classify_issues.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import re
|
||||
|
||||
from . import config
|
||||
|
||||
logger = logging.getLogger("pipeline.eval_parse")
|
||||
|
||||
|
||||
# ─── Diff helpers ──────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def filter_diff(diff: str) -> tuple[str, str]:
|
||||
"""Filter diff to only review-relevant files.
|
||||
|
||||
Returns (review_diff, entity_diff).
|
||||
Strips: inbox/, schemas/, skills/, agents/*/musings/
|
||||
"""
|
||||
sections = re.split(r"(?=^diff --git )", diff, flags=re.MULTILINE)
|
||||
skip_patterns = [r"^diff --git a/(inbox/(archive|queue|null-result)|schemas|skills|agents/[^/]+/musings)/"]
|
||||
core_domains = {"living-agents", "living-capital", "teleohumanity", "mechanisms"}
|
||||
|
||||
claim_sections = []
|
||||
entity_sections = []
|
||||
|
||||
for section in sections:
|
||||
if not section.strip():
|
||||
continue
|
||||
if any(re.match(p, section) for p in skip_patterns):
|
||||
continue
|
||||
entity_match = re.match(r"^diff --git a/entities/([^/]+)/", section)
|
||||
if entity_match and entity_match.group(1) not in core_domains:
|
||||
entity_sections.append(section)
|
||||
continue
|
||||
claim_sections.append(section)
|
||||
|
||||
return "".join(claim_sections), "".join(entity_sections)
|
||||
|
||||
|
||||
def extract_changed_files(diff: str) -> str:
|
||||
"""Extract changed file paths from diff."""
|
||||
return "\n".join(
|
||||
line.replace("diff --git a/", "").split(" b/")[0] for line in diff.split("\n") if line.startswith("diff --git")
|
||||
)
|
||||
|
||||
|
||||
def is_musings_only(diff: str) -> bool:
|
||||
"""Check if PR only modifies musing files."""
|
||||
has_musings = False
|
||||
has_other = False
|
||||
for line in diff.split("\n"):
|
||||
if line.startswith("diff --git"):
|
||||
if "agents/" in line and "/musings/" in line:
|
||||
has_musings = True
|
||||
else:
|
||||
has_other = True
|
||||
return has_musings and not has_other
|
||||
|
||||
|
||||
def diff_contains_claim_type(diff: str) -> bool:
|
||||
"""Claim-shape detector: check if any file in diff has type: claim in frontmatter.
|
||||
|
||||
Mechanical check ($0). If YAML declares type: claim, this is a factual claim —
|
||||
not an entity update or formatting fix. Must be classified STANDARD minimum
|
||||
regardless of Haiku triage. Catches factual claims disguised as LIGHT content.
|
||||
(Theseus: converts semantic problem to mechanical check)
|
||||
"""
|
||||
for line in diff.split("\n"):
|
||||
if line.startswith("+") and not line.startswith("+++"):
|
||||
stripped = line[1:].strip()
|
||||
if stripped in ("type: claim", 'type: "claim"', "type: 'claim'"):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def deterministic_tier(diff: str) -> str | None:
|
||||
"""Deterministic tier routing — skip Haiku triage for obvious cases.
|
||||
|
||||
Checks diff file patterns before calling the LLM. Returns tier string
|
||||
if deterministic, None if Haiku triage is needed.
|
||||
|
||||
Rules (Leo-calibrated):
|
||||
- All files in entities/ only → LIGHT
|
||||
- All files in inbox/ only (queue, archive, null-result) → LIGHT
|
||||
- Any file in core/ or foundations/ → DEEP (structural KB changes)
|
||||
- Has challenged_by field → DEEP (challenges existing claims)
|
||||
- Modifies existing file (not new) in domains/ → DEEP (enrichment/change)
|
||||
- Otherwise → None (needs Haiku triage)
|
||||
|
||||
NOTE: Cross-domain wiki links are NOT a DEEP signal — most claims link
|
||||
across domains, that's the whole point of the knowledge graph (Leo).
|
||||
"""
|
||||
changed_files = []
|
||||
for line in diff.split("\n"):
|
||||
if line.startswith("diff --git a/"):
|
||||
path = line.replace("diff --git a/", "").split(" b/")[0]
|
||||
changed_files.append(path)
|
||||
|
||||
if not changed_files:
|
||||
return None
|
||||
|
||||
# All entities/ only → LIGHT
|
||||
if all(f.startswith("entities/") for f in changed_files):
|
||||
logger.info("Deterministic tier: LIGHT (all files in entities/)")
|
||||
return "LIGHT"
|
||||
|
||||
# All inbox/ only (queue, archive, null-result) → LIGHT
|
||||
if all(f.startswith("inbox/") for f in changed_files):
|
||||
logger.info("Deterministic tier: LIGHT (all files in inbox/)")
|
||||
return "LIGHT"
|
||||
|
||||
# Any file in core/ or foundations/ → DEEP (structural KB changes)
|
||||
if any(f.startswith("core/") or f.startswith("foundations/") for f in changed_files):
|
||||
logger.info("Deterministic tier: DEEP (touches core/ or foundations/)")
|
||||
return "DEEP"
|
||||
|
||||
# Check diff content for DEEP signals
|
||||
has_challenged_by = False
|
||||
new_files: set[str] = set()
|
||||
|
||||
lines = diff.split("\n")
|
||||
for i, line in enumerate(lines):
|
||||
# Detect new files
|
||||
if line.startswith("--- /dev/null") and i + 1 < len(lines) and lines[i + 1].startswith("+++ b/"):
|
||||
new_files.add(lines[i + 1][6:])
|
||||
# Check for challenged_by field
|
||||
if line.startswith("+") and not line.startswith("+++"):
|
||||
stripped = line[1:].strip()
|
||||
if stripped.startswith("challenged_by:"):
|
||||
has_challenged_by = True
|
||||
|
||||
if has_challenged_by:
|
||||
logger.info("Deterministic tier: DEEP (has challenged_by field)")
|
||||
return "DEEP"
|
||||
|
||||
# NOTE: Modified existing domain claims are NOT auto-DEEP — enrichments
|
||||
# (appending evidence) are common and should be STANDARD. Let Haiku triage
|
||||
# distinguish enrichments from structural changes.
|
||||
|
||||
return None
|
||||
|
||||
|
||||
# ─── Verdict parsing ──────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def parse_verdict(review_text: str, reviewer: str) -> str:
|
||||
"""Parse VERDICT tag from review. Returns 'approve' or 'request_changes'."""
|
||||
upper = reviewer.upper()
|
||||
if f"VERDICT:{upper}:APPROVE" in review_text:
|
||||
return "approve"
|
||||
elif f"VERDICT:{upper}:REQUEST_CHANGES" in review_text:
|
||||
return "request_changes"
|
||||
else:
|
||||
logger.warning("No parseable verdict from %s — treating as request_changes", reviewer)
|
||||
return "request_changes"
|
||||
|
||||
|
||||
# Map model-invented tags to valid tags. Models consistently ignore the valid
|
||||
# tag list and invent their own. This normalizes them. (Ganymede, Mar 14)
|
||||
_TAG_ALIASES: dict[str, str] = {
|
||||
"schema_violation": "frontmatter_schema",
|
||||
"missing_schema_fields": "frontmatter_schema",
|
||||
"missing_schema": "frontmatter_schema",
|
||||
"schema": "frontmatter_schema",
|
||||
"missing_frontmatter": "frontmatter_schema",
|
||||
"redundancy": "near_duplicate",
|
||||
"duplicate": "near_duplicate",
|
||||
"missing_confidence": "confidence_miscalibration",
|
||||
"confidence_error": "confidence_miscalibration",
|
||||
"vague_claims": "scope_error",
|
||||
"unfalsifiable": "scope_error",
|
||||
"unverified_wiki_links": "broken_wiki_links",
|
||||
"unverified-wiki-links": "broken_wiki_links",
|
||||
"missing_wiki_links": "broken_wiki_links",
|
||||
"invalid_wiki_links": "broken_wiki_links",
|
||||
"wiki_link_errors": "broken_wiki_links",
|
||||
"overclaiming": "title_overclaims",
|
||||
"title_overclaim": "title_overclaims",
|
||||
"date_error": "date_errors",
|
||||
"factual_error": "factual_discrepancy",
|
||||
"factual_inaccuracy": "factual_discrepancy",
|
||||
}
|
||||
|
||||
VALID_ISSUE_TAGS = {"broken_wiki_links", "frontmatter_schema", "title_overclaims",
|
||||
"confidence_miscalibration", "date_errors", "factual_discrepancy",
|
||||
"near_duplicate", "scope_error"}
|
||||
|
||||
|
||||
def normalize_tag(tag: str) -> str | None:
|
||||
"""Normalize a model-generated tag to a valid tag, or None if unrecognizable."""
|
||||
tag = tag.strip().lower().replace("-", "_")
|
||||
if tag in VALID_ISSUE_TAGS:
|
||||
return tag
|
||||
if tag in _TAG_ALIASES:
|
||||
return _TAG_ALIASES[tag]
|
||||
# Fuzzy: check if any valid tag is a substring or vice versa
|
||||
for valid in VALID_ISSUE_TAGS:
|
||||
if valid in tag or tag in valid:
|
||||
return valid
|
||||
return None
|
||||
|
||||
|
||||
# ─── Issue parsing ─────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
# Keyword patterns for inferring issue tags from unstructured review prose.
|
||||
# Conservative: only match unambiguous indicators. Order doesn't matter.
|
||||
_PROSE_TAG_PATTERNS: dict[str, list[re.Pattern]] = {
|
||||
"frontmatter_schema": [
|
||||
re.compile(r"frontmatter", re.IGNORECASE),
|
||||
re.compile(r"missing.{0,20}(type|domain|confidence|source|created)\b", re.IGNORECASE),
|
||||
re.compile(r"yaml.{0,10}(invalid|missing|error|schema)", re.IGNORECASE),
|
||||
re.compile(r"required field", re.IGNORECASE),
|
||||
re.compile(r"lacks?.{0,15}(required|yaml|schema|fields)", re.IGNORECASE),
|
||||
re.compile(r"missing.{0,15}(schema|fields|frontmatter)", re.IGNORECASE),
|
||||
re.compile(r"schema.{0,10}(compliance|violation|missing|invalid)", re.IGNORECASE),
|
||||
],
|
||||
"broken_wiki_links": [
|
||||
re.compile(r"(broken|dead|invalid).{0,10}(wiki.?)?link", re.IGNORECASE),
|
||||
re.compile(r"wiki.?link.{0,20}(not found|missing|broken|invalid|resolv|unverif)", re.IGNORECASE),
|
||||
re.compile(r"\[\[.{1,80}\]\].{0,20}(not found|doesn.t exist|missing)", re.IGNORECASE),
|
||||
re.compile(r"unverified.{0,10}(wiki|link)", re.IGNORECASE),
|
||||
],
|
||||
"factual_discrepancy": [
|
||||
re.compile(r"factual.{0,10}(error|inaccura|discrepanc|incorrect)", re.IGNORECASE),
|
||||
re.compile(r"misrepresent", re.IGNORECASE),
|
||||
],
|
||||
"confidence_miscalibration": [
|
||||
re.compile(r"confidence.{0,20}(too high|too low|miscalibrat|overstat|should be)", re.IGNORECASE),
|
||||
re.compile(r"(overstat|understat).{0,20}confidence", re.IGNORECASE),
|
||||
],
|
||||
"scope_error": [
|
||||
re.compile(r"scope.{0,10}(error|too broad|overscop|unscoped)", re.IGNORECASE),
|
||||
re.compile(r"unscoped.{0,10}(universal|claim)", re.IGNORECASE),
|
||||
re.compile(r"(vague|unfalsifiable).{0,15}(claim|assertion)", re.IGNORECASE),
|
||||
re.compile(r"not.{0,10}(specific|falsifiable|disagreeable).{0,10}enough", re.IGNORECASE),
|
||||
],
|
||||
"title_overclaims": [
|
||||
re.compile(r"title.{0,20}(overclaim|overstat|too broad)", re.IGNORECASE),
|
||||
re.compile(r"overclaim", re.IGNORECASE),
|
||||
],
|
||||
"near_duplicate": [
|
||||
re.compile(r"near.?duplicate", re.IGNORECASE),
|
||||
re.compile(r"(very|too) similar.{0,20}(claim|title|existing)", re.IGNORECASE),
|
||||
re.compile(r"duplicate.{0,20}(of|claim|title|existing|information)", re.IGNORECASE),
|
||||
re.compile(r"redundan", re.IGNORECASE),
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
def parse_issues(review_text: str) -> list[str]:
|
||||
"""Extract issue tags from review.
|
||||
|
||||
First tries structured <!-- ISSUES: tag1, tag2 --> comment with tag normalization.
|
||||
Falls back to keyword inference from prose.
|
||||
"""
|
||||
match = re.search(r"<!-- ISSUES: ([^>]+) -->", review_text)
|
||||
if match:
|
||||
raw_tags = [tag.strip() for tag in match.group(1).split(",") if tag.strip()]
|
||||
normalized = []
|
||||
for tag in raw_tags:
|
||||
norm = normalize_tag(tag)
|
||||
if norm and norm not in normalized:
|
||||
normalized.append(norm)
|
||||
else:
|
||||
logger.debug("Unrecognized issue tag '%s' — dropped", tag)
|
||||
if normalized:
|
||||
return normalized
|
||||
# Fallback: infer tags from review prose
|
||||
return infer_issues_from_prose(review_text)
|
||||
|
||||
|
||||
def infer_issues_from_prose(review_text: str) -> list[str]:
|
||||
"""Infer issue tags from unstructured review text via keyword matching.
|
||||
|
||||
Fallback for reviews that reject without structured <!-- ISSUES: --> tags.
|
||||
Conservative: requires at least one unambiguous keyword match per tag.
|
||||
"""
|
||||
inferred = []
|
||||
for tag, patterns in _PROSE_TAG_PATTERNS.items():
|
||||
if any(p.search(review_text) for p in patterns):
|
||||
inferred.append(tag)
|
||||
return inferred
|
||||
|
||||
|
||||
def classify_issues(issues: list[str]) -> str:
|
||||
"""Classify issue tags as 'mechanical', 'substantive', or 'mixed'."""
|
||||
if not issues:
|
||||
return "unknown"
|
||||
mechanical = set(issues) & config.MECHANICAL_ISSUE_TAGS
|
||||
substantive = set(issues) & config.SUBSTANTIVE_ISSUE_TAGS
|
||||
if substantive and not mechanical:
|
||||
return "substantive"
|
||||
if mechanical and not substantive:
|
||||
return "mechanical"
|
||||
if mechanical and substantive:
|
||||
return "mixed"
|
||||
return "unknown" # tags not in either set
|
||||
|
||||
|
||||
# ─── Batch response parsing ───────────────────────────────────────────────
|
||||
|
||||
|
||||
def parse_batch_response(response: str, pr_numbers: list[int], agent: str) -> dict[int, str]:
|
||||
"""Parse batched domain review into per-PR review sections.
|
||||
|
||||
Returns {pr_number: review_text} for each PR found in the response.
|
||||
Missing PRs are omitted — caller handles fallback.
|
||||
"""
|
||||
agent_upper = agent.upper()
|
||||
result: dict[int, str] = {}
|
||||
|
||||
# Split by PR verdict markers: <!-- PR:NNN VERDICT:AGENT:... -->
|
||||
# Each marker terminates the previous PR's section
|
||||
pattern = re.compile(
|
||||
r"<!-- PR:(\d+) VERDICT:" + re.escape(agent_upper) + r":(APPROVE|REQUEST_CHANGES) -->"
|
||||
)
|
||||
|
||||
matches = list(pattern.finditer(response))
|
||||
if not matches:
|
||||
return result
|
||||
|
||||
for i, match in enumerate(matches):
|
||||
pr_num = int(match.group(1))
|
||||
marker_end = match.end()
|
||||
|
||||
# Find the start of this PR's section by looking for the section header
|
||||
# or the end of the previous verdict
|
||||
section_header = f"=== PR #{pr_num}"
|
||||
header_pos = response.rfind(section_header, 0, match.start())
|
||||
|
||||
if header_pos >= 0:
|
||||
# Extract from header to end of verdict marker
|
||||
section_text = response[header_pos:marker_end].strip()
|
||||
else:
|
||||
# No header found — extract from previous marker end to this marker end
|
||||
prev_end = matches[i - 1].end() if i > 0 else 0
|
||||
section_text = response[prev_end:marker_end].strip()
|
||||
|
||||
# Re-format as individual review comment
|
||||
# Strip the batch section header, keep just the review content
|
||||
# Add batch label for traceability
|
||||
pr_nums_str = ", ".join(f"#{n}" for n in pr_numbers)
|
||||
review_text = (
|
||||
f"*(batch review with PRs {pr_nums_str})*\n\n"
|
||||
f"{section_text}\n"
|
||||
)
|
||||
result[pr_num] = review_text
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def validate_batch_fanout(
|
||||
parsed: dict[int, str],
|
||||
pr_diffs: list[dict],
|
||||
agent: str,
|
||||
) -> tuple[dict[int, str], list[int]]:
|
||||
"""Validate batch fan-out for completeness and cross-contamination.
|
||||
|
||||
Returns (valid_reviews, fallback_pr_numbers).
|
||||
- valid_reviews: reviews that passed validation
|
||||
- fallback_pr_numbers: PRs that need individual review (missing or cross-contaminated)
|
||||
"""
|
||||
valid: dict[int, str] = {}
|
||||
fallback: list[int] = []
|
||||
|
||||
# Build file map: pr_number → set of path segments for matching.
|
||||
# Use full paths (e.g., "domains/internet-finance/dao.md") not bare filenames
|
||||
# to avoid false matches on short names like "dao.md" or "space.md" (Leo note #3).
|
||||
pr_files: dict[int, set[str]] = {}
|
||||
for pr in pr_diffs:
|
||||
files = set()
|
||||
for line in pr["diff"].split("\n"):
|
||||
if line.startswith("diff --git a/"):
|
||||
path = line.replace("diff --git a/", "").split(" b/")[0]
|
||||
files.add(path)
|
||||
# Also add the last 2 path segments (e.g., "internet-finance/dao.md")
|
||||
# for models that abbreviate paths
|
||||
parts = path.split("/")
|
||||
if len(parts) >= 2:
|
||||
files.add("/".join(parts[-2:]))
|
||||
pr_files[pr["number"]] = files
|
||||
|
||||
for pr in pr_diffs:
|
||||
pr_num = pr["number"]
|
||||
|
||||
# Completeness check: is there a review for this PR?
|
||||
if pr_num not in parsed:
|
||||
logger.warning("Batch fan-out: PR #%d missing from response — fallback to individual", pr_num)
|
||||
fallback.append(pr_num)
|
||||
continue
|
||||
|
||||
review = parsed[pr_num]
|
||||
|
||||
# Cross-contamination check: does review mention at least one file from this PR?
|
||||
# Use path segments (min 10 chars) to avoid false substring matches on short names.
|
||||
my_files = pr_files.get(pr_num, set())
|
||||
mentions_own_file = any(f in review for f in my_files if len(f) >= 10)
|
||||
|
||||
if not mentions_own_file and my_files:
|
||||
# Check if it references files from OTHER PRs (cross-contamination signal)
|
||||
other_files = set()
|
||||
for other_pr in pr_diffs:
|
||||
if other_pr["number"] != pr_num:
|
||||
other_files.update(pr_files.get(other_pr["number"], set()))
|
||||
mentions_other = any(f in review for f in other_files if len(f) >= 10)
|
||||
|
||||
if mentions_other:
|
||||
logger.warning(
|
||||
"Batch fan-out: PR #%d review references files from another PR — cross-contamination, fallback",
|
||||
pr_num,
|
||||
)
|
||||
fallback.append(pr_num)
|
||||
continue
|
||||
# If it doesn't mention any files at all, could be a generic review — accept it
|
||||
# (some PRs have short diffs where the model doesn't reference filenames)
|
||||
|
||||
valid[pr_num] = review
|
||||
|
||||
return valid, fallback
|
||||
900
lib/evaluate.py
900
lib/evaluate.py
File diff suppressed because it is too large
Load diff
1075
lib/extract.py
Normal file
1075
lib/extract.py
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -6,7 +6,7 @@ The extraction prompt focuses on WHAT to extract:
|
|||
- Identify entity data
|
||||
- Check for duplicates against KB index
|
||||
|
||||
Mechanical enforcement (frontmatter format, wiki links, dates, filenames)
|
||||
Mechanical enforcement (frontmatter format, dates, filenames)
|
||||
is handled by post_extract.py AFTER the LLM returns.
|
||||
|
||||
Design principle (Leo): mechanical rules in code, judgment in prompts.
|
||||
|
|
@ -27,6 +27,9 @@ def build_extraction_prompt(
|
|||
rationale: str | None = None,
|
||||
intake_tier: str | None = None,
|
||||
proposed_by: str | None = None,
|
||||
prior_art: list[dict] | None = None,
|
||||
previous_feedback: dict | None = None,
|
||||
source_format: str | None = None,
|
||||
) -> str:
|
||||
"""Build the lean extraction prompt.
|
||||
|
||||
|
|
@ -40,6 +43,10 @@ def build_extraction_prompt(
|
|||
rationale: Contributor's natural-language thesis about the source (optional)
|
||||
intake_tier: undirected | directed | challenge (optional)
|
||||
proposed_by: Contributor handle who submitted the source (optional)
|
||||
prior_art: Qdrant search results — existing claims semantically similar to this source.
|
||||
Each dict has: claim_title, claim_path, description, score.
|
||||
Injected as connection candidates for extract-time linking.
|
||||
source_format: Source format hint (e.g. "conversation" for Telegram chats).
|
||||
|
||||
Returns:
|
||||
The complete prompt string
|
||||
|
|
@ -72,6 +79,153 @@ Set `contributor_thesis_extractable: true` if you extracted the contributor's th
|
|||
else:
|
||||
contributor_directive = ""
|
||||
|
||||
# Build previous feedback section (for re-extraction after eval rejection)
|
||||
if previous_feedback:
|
||||
issues = previous_feedback.get("issues", [])
|
||||
leo_verdict = previous_feedback.get("leo", "")
|
||||
domain_verdict = previous_feedback.get("domain", "")
|
||||
feedback_lines = [
|
||||
"\n## Previous Extraction Feedback\n",
|
||||
"A previous extraction from this source was **rejected** by the evaluation pipeline.",
|
||||
"Learn from these issues and avoid repeating them:\n",
|
||||
]
|
||||
if issues:
|
||||
for issue in issues:
|
||||
issue_guidance = {
|
||||
"frontmatter_schema": "Fix frontmatter format — ensure all required fields are present and correctly typed.",
|
||||
"title_overclaims": "Make titles more precise — avoid broad generalizations. The title must be specific enough to disagree with.",
|
||||
"confidence_miscalibration": "Calibrate confidence honestly — single source = experimental at most. Don't mark speculative claims as likely.",
|
||||
"factual_discrepancy": "Check facts carefully — verify dates, numbers, and attributions against the source text.",
|
||||
"near_duplicate": "Check the KB index more carefully — this claim may already exist. Prefer enrichment over duplication.",
|
||||
"scope_error": "Scope claims correctly — don't mix structural, functional, and causal claims in one.",
|
||||
"broken_wiki_links": "Do NOT use [[wiki links]] in body text. Use the connections and related_claims JSON fields instead.",
|
||||
}
|
||||
guidance = issue_guidance.get(issue, f"Address: {issue}")
|
||||
feedback_lines.append(f"- **{issue}**: {guidance}")
|
||||
feedback_lines.append("")
|
||||
if leo_verdict == "request_changes":
|
||||
feedback_lines.append("The lead reviewer requested changes. Extract fewer, higher-quality claims.")
|
||||
if domain_verdict == "request_changes":
|
||||
feedback_lines.append("The domain reviewer requested changes. Pay closer attention to domain-specific standards.")
|
||||
feedback_lines.append("")
|
||||
previous_feedback_section = "\n".join(feedback_lines)
|
||||
else:
|
||||
previous_feedback_section = ""
|
||||
|
||||
# Build connection candidates section (if prior art found via Qdrant)
|
||||
if prior_art:
|
||||
pa_lines = [
|
||||
"\n## Connection Candidates (semantically similar existing claims)\n",
|
||||
"These existing claims are topically related to this source. For each NEW claim you extract,",
|
||||
"check this list and specify connections in the `connections` array.\n",
|
||||
]
|
||||
high_sim = []
|
||||
for i, pa in enumerate(prior_art[:10], 1):
|
||||
title = pa.get("claim_title", "untitled")
|
||||
path = pa.get("claim_path", "")
|
||||
desc = pa.get("description", "")
|
||||
score = pa.get("score", 0)
|
||||
filename = path.rsplit("/", 1)[-1].replace(".md", "") if path else title
|
||||
pa_lines.append(f"{i}. **{title}** (`{filename}`, similarity: {score:.2f})")
|
||||
if desc:
|
||||
pa_lines.append(f" {desc}")
|
||||
if score >= 0.75:
|
||||
high_sim.append(title)
|
||||
pa_lines.append("")
|
||||
if high_sim:
|
||||
pa_lines.append("**WARNING — HIGH SIMILARITY MATCHES (score >= 0.75):**")
|
||||
pa_lines.append("The following existing claims are very similar to themes in this source.")
|
||||
pa_lines.append("Do NOT extract new claims that restate these — use ENRICHMENT instead:")
|
||||
for hs in high_sim:
|
||||
pa_lines.append(f" - {hs}")
|
||||
pa_lines.append("")
|
||||
connection_candidates = "\n".join(pa_lines)
|
||||
else:
|
||||
connection_candidates = ""
|
||||
|
||||
# Build conversation extraction section (for Telegram/chat sources)
|
||||
if source_format and source_format.lower() == "conversation":
|
||||
conversation_section = """
|
||||
## Conversation Source — Special Extraction Rules
|
||||
|
||||
This source is a **conversation between a human domain expert and an AI agent**.
|
||||
The extraction rules are DIFFERENT from article sources:
|
||||
|
||||
### Who said what matters
|
||||
|
||||
- **The human (@m3taversal / contributor)** is the domain expert. Their statements carry
|
||||
authority — especially corrections, pushback, and factual assertions.
|
||||
- **The AI agent's responses** are secondary. They are useful for context (what was being
|
||||
discussed) and for confirming when the human's correction landed (look for "you're right",
|
||||
"fair point", confidence drops).
|
||||
|
||||
### Corrections are the HIGHEST-VALUE content
|
||||
|
||||
When the human says "that's wrong", "not true", "you're wrong", "out of date", or similar:
|
||||
|
||||
1. **Extract the correction as a claim or enrichment.** The human is correcting the KB's
|
||||
understanding. This is precisely what the KB needs.
|
||||
2. **The correction itself IS the claim.** "Curated launches had significantly more committed
|
||||
capital than permissionless launches" is a testable, disagreeable proposition — extract it
|
||||
AS A CLAIM, not just an enrichment. If the correction states something specific enough to
|
||||
disagree with, it's a claim. Extract it even if it's only one sentence.
|
||||
3. **Short corrections are HIGH value, not low value.** A 15-word correction that fixes a
|
||||
factual error is worth more than a 500-word article that confirms what we already know.
|
||||
NEVER null-result a conversation just because the human's message is short.
|
||||
4. **Map corrections to existing claims.** Search the KB index for claims that the correction
|
||||
challenges. Output BOTH a new claim (the corrected understanding) AND an enrichment
|
||||
(type: "challenge") targeting the existing claim. The enrichment links the correction
|
||||
to what it corrects; the claim captures the corrected knowledge as a standalone proposition.
|
||||
|
||||
### Bot LEARNING lines are extraction hints
|
||||
|
||||
When the AI agent includes a `LEARNING:` line, it's a pre-extracted correction. Use it as
|
||||
a starting point — but reformulate it as a proper claim (the LEARNING line is often too
|
||||
casual or too specific to the conversation context).
|
||||
|
||||
### Bot CONFIDENCE drops are signals
|
||||
|
||||
When the AI agent drops its confidence score after a correction, that CONFIRMS the human
|
||||
was right. Low confidence (0.3-0.5) after pushback = strong signal the correction is valid.
|
||||
|
||||
### Trust hierarchy for numbers and specifics
|
||||
|
||||
**CRITICAL:** Neither the human NOR the AI agent should be treated as authoritative sources
|
||||
for specific numbers, dates, dollar amounts, or statistics UNLESS they cite a verifiable
|
||||
external source (on-chain data, official announcements, published reports).
|
||||
|
||||
- **Bot-generated numbers are ALWAYS unverified.** When the AI agent says "$25.6M committed
|
||||
capital" or "15x oversubscription" — these are the bot's best guess, NOT verified data.
|
||||
NEVER extract bot-generated numbers as evidence in a claim.
|
||||
- **Human-asserted numbers are ALSO unverified** unless they cite a source. "It raised $11.4M"
|
||||
from the human is a claim about a number, not proof of the number.
|
||||
- **Extract the DIRECTIONAL insight, not the specific figures.** "Curated launches attracted
|
||||
significantly more committed capital than permissionless launches" is extractable.
|
||||
"$25.6M vs $11.4M" is not — unless the conversation cites where those numbers come from.
|
||||
- **If specific figures are important to the claim, flag them.** Add a note in the claim body:
|
||||
"Note: specific figures cited in conversation require verification against on-chain data."
|
||||
|
||||
The goal: capture WHAT the human is asserting (the mechanism, the direction, the pattern)
|
||||
without laundering unverified numbers into the knowledge base as if they were evidence.
|
||||
|
||||
### Anti-circularity rule
|
||||
|
||||
If the AI agent is simply reflecting the human's thesis back (restating what the human said
|
||||
in different words), do NOT extract that as a claim sourced from the agent. That's circular.
|
||||
Only extract claims that either:
|
||||
- Represent the human's ORIGINAL assertion (source it to the human)
|
||||
- Introduce genuinely NEW information from the agent's knowledge (source it to the agent + context)
|
||||
|
||||
### Retrieval-only conversations → null_result
|
||||
|
||||
If the conversation is purely a lookup request ("what is X", "give me a list of Y",
|
||||
"what's the market cap of Z") with no analytical content, corrections, or novel claims,
|
||||
return an empty extraction (null_result). The dividing line: did the human ASSERT something
|
||||
or only ASK something?
|
||||
"""
|
||||
else:
|
||||
conversation_section = ""
|
||||
|
||||
return f"""You are {agent}, extracting knowledge from a source for TeleoHumanity's collective knowledge base.
|
||||
|
||||
## Your Task
|
||||
|
|
@ -136,14 +290,16 @@ Single source = experimental at most. Pitch rhetoric or marketing copy = specula
|
|||
**File:** {source_file}
|
||||
|
||||
{source_content}
|
||||
{contributor_directive}
|
||||
## KB Index (existing claims — check for duplicates and enrichment targets)
|
||||
{conversation_section}{contributor_directive}{previous_feedback_section}{connection_candidates}
|
||||
## KB Index (existing claims and entities — check for duplicates, enrichment targets, and connections)
|
||||
|
||||
{kb_index}
|
||||
|
||||
## Output Format
|
||||
|
||||
Return valid JSON. The post-processor handles frontmatter formatting, wiki links, and dates — focus on the intellectual content.
|
||||
Return valid JSON. The post-processor handles frontmatter formatting and dates — focus on the intellectual content.
|
||||
|
||||
**Do NOT use [[wiki links]] in body text.** Express all cross-references through the `connections` and `related_claims` JSON fields instead. Inline [[links]] are stripped by the post-processor — use the structured JSON fields which capture relationship type and reason.
|
||||
|
||||
```json
|
||||
{{
|
||||
|
|
@ -157,6 +313,13 @@ Return valid JSON. The post-processor handles frontmatter formatting, wiki links
|
|||
"source": "author/org, key evidence reference",
|
||||
"body": "Argument with evidence. Cite specific data, quotes, studies from the source. Explain WHY the claim is supported. This must be a real argument, not a restatement of the title.",
|
||||
"related_claims": ["existing-claim-stem-from-kb-index"],
|
||||
"connections": [
|
||||
{{
|
||||
"target": "existing-claim-filename-from-connection-candidates-or-kb-index",
|
||||
"relationship": "supports|challenges|related",
|
||||
"reason": "One sentence: WHY does this claim support/challenge/relate to the target?"
|
||||
}}
|
||||
],
|
||||
"scope": "structural|functional|causal|correlational",
|
||||
"sourcer": "handle or name of the original author/source (e.g., @theiaresearch, Pine Analytics)"
|
||||
}}
|
||||
|
|
@ -206,8 +369,9 @@ Return valid JSON. The post-processor handles frontmatter formatting, wiki links
|
|||
3. **Facts are not claims.** Individual data points go in `facts`. Only generalized patterns from multiple data points become claims.
|
||||
4. **Proposals are entities, not claims.** A governance proposal, token launch, or funding event is structured data (entity). Only extract a claim if the event reveals a novel mechanism insight that generalizes beyond this specific case.
|
||||
5. **Scope your claims.** Say whether you're claiming a structural, functional, causal, or correlational relationship.
|
||||
6. **OPSEC.** Never extract specific dollar amounts, valuations, equity percentages, or deal terms for LivingIP/Teleo. General market data is fine.
|
||||
7. **Read the Agent Notes.** If the source has "Agent Notes" or "Curator Notes" sections, they contain context about why this source matters.
|
||||
6. **Connect your claims.** For every new claim, check the Connection Candidates list. If a candidate is related, add it to the `connections` array with the relationship type and a one-sentence reason. Use `supports` when your claim provides evidence for the target, `challenges` when it contradicts, `related` only as a last resort. Unconnected claims are orphans — connect them at birth.
|
||||
7. **OPSEC.** Never extract specific dollar amounts, valuations, equity percentages, or deal terms for LivingIP/Teleo. General market data is fine.
|
||||
8. **Read the Agent Notes.** If the source has "Agent Notes" or "Curator Notes" sections, they contain context about why this source matters.
|
||||
|
||||
Return valid JSON only. No markdown fencing, no explanation outside the JSON.
|
||||
"""
|
||||
|
|
|
|||
52
lib/fixer.py
52
lib/fixer.py
|
|
@ -22,6 +22,7 @@ import logging
|
|||
from pathlib import Path
|
||||
|
||||
from . import config, db
|
||||
from .pr_state import close_pr, reset_for_reeval, start_fixing
|
||||
from .validate import WIKI_LINK_RE, load_existing_claims
|
||||
|
||||
logger = logging.getLogger("pipeline.fixer")
|
||||
|
|
@ -62,19 +63,9 @@ async def _fix_wiki_links_in_pr(conn, pr_number: int) -> dict:
|
|||
between new claims in the same PR are preserved.
|
||||
"""
|
||||
# Atomic claim — prevent concurrent fixers and evaluators
|
||||
cursor = conn.execute(
|
||||
"UPDATE prs SET status = 'fixing', last_attempt = datetime('now') WHERE number = ? AND status = 'open'",
|
||||
(pr_number,),
|
||||
)
|
||||
if cursor.rowcount == 0:
|
||||
if not start_fixing(conn, pr_number):
|
||||
return {"pr": pr_number, "skipped": True, "reason": "not_open"}
|
||||
|
||||
# Increment fix_attempts
|
||||
conn.execute(
|
||||
"UPDATE prs SET fix_attempts = COALESCE(fix_attempts, 0) + 1 WHERE number = ?",
|
||||
(pr_number,),
|
||||
)
|
||||
|
||||
# Get PR branch from DB first, fall back to Forgejo API
|
||||
row = conn.execute("SELECT branch FROM prs WHERE number = ?", (pr_number,)).fetchone()
|
||||
branch = row["branch"] if row and row["branch"] else None
|
||||
|
|
@ -177,18 +168,7 @@ async def _fix_wiki_links_in_pr(conn, pr_number: int) -> dict:
|
|||
# Reset eval state BEFORE push — if daemon crashes between push and
|
||||
# reset, the PR would be permanently stuck at max eval_attempts.
|
||||
# Reset-first: worst case is one wasted eval cycle on old content.
|
||||
conn.execute(
|
||||
"""UPDATE prs SET
|
||||
status = 'open',
|
||||
eval_attempts = 0,
|
||||
eval_issues = '[]',
|
||||
tier0_pass = NULL,
|
||||
domain_verdict = 'pending',
|
||||
leo_verdict = 'pending',
|
||||
last_error = NULL
|
||||
WHERE number = ?""",
|
||||
(pr_number,),
|
||||
)
|
||||
reset_for_reeval(conn, pr_number)
|
||||
|
||||
rc, out = await _git("push", "origin", branch, cwd=worktree_path, timeout=30)
|
||||
if rc != 0:
|
||||
|
|
@ -225,15 +205,29 @@ async def fix_cycle(conn, max_workers=None) -> tuple[int, int]:
|
|||
# Garbage collection: close PRs with exhausted fix budget that are stuck in open.
|
||||
# These were evaluated, rejected, fixer couldn't help, nobody closes them.
|
||||
# (Epimetheus session 2 — prevents zombie PR accumulation)
|
||||
_gc = conn.execute(
|
||||
"""UPDATE prs SET status = 'closed', last_error = 'fix budget exhausted — auto-closed'
|
||||
# Bug fix: must also close on Forgejo + delete branch, not just DB update.
|
||||
# DB-only close caused Forgejo/DB state divergence — branches stayed alive,
|
||||
# blocking Gate 2 in batch-extract for 5 days. (Epimetheus session 4)
|
||||
gc_rows = conn.execute(
|
||||
"""SELECT number, branch FROM prs
|
||||
WHERE status = 'open'
|
||||
AND fix_attempts >= ?
|
||||
AND (domain_verdict = 'request_changes' OR leo_verdict = 'request_changes')""",
|
||||
(config.MAX_FIX_ATTEMPTS + 2,), # GC threshold = mechanical + substantive budget
|
||||
)
|
||||
if _gc.rowcount > 0:
|
||||
logger.info("GC: closed %d exhausted PRs", _gc.rowcount)
|
||||
(config.MAX_FIX_ATTEMPTS + 2,),
|
||||
).fetchall()
|
||||
if gc_rows:
|
||||
from .forgejo import api as _gc_forgejo, repo_path as _gc_repo_path
|
||||
for row in gc_rows:
|
||||
pr_num, branch = row["number"], row["branch"]
|
||||
try:
|
||||
await _gc_forgejo("POST", _gc_repo_path(f"issues/{pr_num}/comments"),
|
||||
{"body": "Auto-closed: fix budget exhausted. Source will be re-extracted."})
|
||||
await close_pr(conn, pr_num, last_error='fix budget exhausted — auto-closed')
|
||||
if branch:
|
||||
await _gc_forgejo("DELETE", _gc_repo_path(f"branches/{branch}"))
|
||||
except Exception as e:
|
||||
logger.warning("GC: failed to close PR #%d on Forgejo: %s", pr_num, e)
|
||||
logger.info("GC: closed %d exhausted PRs (DB + Forgejo + branch cleanup)", len(gc_rows))
|
||||
|
||||
batch_limit = min(max_workers or config.MAX_FIX_PER_CYCLE, config.MAX_FIX_PER_CYCLE)
|
||||
|
||||
|
|
|
|||
142
lib/frontmatter.py
Normal file
142
lib/frontmatter.py
Normal file
|
|
@ -0,0 +1,142 @@
|
|||
"""Pure YAML frontmatter parsing and serialization for claim/entity files.
|
||||
|
||||
Shared by merge (reweave merge, reciprocal edges) and reweave scripts.
|
||||
All functions are pure — zero I/O, zero async, zero DB.
|
||||
|
||||
Extracted from merge.py Phase 6 of decomposition (Ganymede-approved plan).
|
||||
"""
|
||||
|
||||
import yaml
|
||||
|
||||
|
||||
def _yaml_quote(value: str) -> str:
|
||||
"""Quote a YAML list value if it contains characters that would break parsing."""
|
||||
s = str(value)
|
||||
if ":" in s or s.startswith(("{", "[", "'", '"', "*", "&", "!", "|", ">")):
|
||||
escaped = s.replace('"', '\\"')
|
||||
return f'"{escaped}"'
|
||||
return s
|
||||
|
||||
|
||||
# Edge field names recognized in claim frontmatter.
|
||||
# Order matters: serialize_edge_fields writes them in this order when appending new fields.
|
||||
REWEAVE_EDGE_FIELDS = ("supports", "challenges", "challenged_by", "depends_on", "related", "reweave_edges")
|
||||
|
||||
# Reciprocal edge mapping: when A has edge_type → B, B gets reciprocal → A.
|
||||
# When A supports B, B also supports A (approximately symmetric).
|
||||
# When A challenges B, B is challenged_by A (NOT symmetric — direction matters).
|
||||
RECIPROCAL_EDGE_MAP = {
|
||||
"supports": "supports",
|
||||
"challenges": "challenged_by",
|
||||
"related": "related",
|
||||
"depends_on": "related", # A depends_on B → B is related to A (not symmetric)
|
||||
}
|
||||
|
||||
|
||||
def parse_yaml_frontmatter(text: str) -> tuple[dict | None, str, str]:
|
||||
"""Parse YAML frontmatter from markdown text.
|
||||
|
||||
Returns (frontmatter_dict, raw_fm_text, body_text_including_closing_delimiter).
|
||||
Returns (None, "", text) if no valid frontmatter found.
|
||||
raw_fm_text is the text between the --- delimiters (no delimiters, no leading newline).
|
||||
"""
|
||||
if not text.startswith("---"):
|
||||
return None, "", text
|
||||
end = text.find("\n---", 3)
|
||||
if end == -1:
|
||||
return None, "", text
|
||||
try:
|
||||
raw_fm_text = text[4:end] # skip "---\n", stop before "\n---"
|
||||
fm = yaml.safe_load(raw_fm_text)
|
||||
body = text[end:] # includes closing \n--- and body
|
||||
return (fm if isinstance(fm, dict) else None), raw_fm_text, body
|
||||
except Exception:
|
||||
return None, "", text
|
||||
|
||||
|
||||
def union_edge_lists(main_edges: list, branch_edges: list) -> list:
|
||||
"""Union two edge lists, preserving order from main (append new at end).
|
||||
|
||||
Deduplicates by lowercase slug. Main's order is preserved; branch-only
|
||||
edges are appended in their original order.
|
||||
"""
|
||||
seen = set()
|
||||
result = []
|
||||
for edge in main_edges:
|
||||
key = str(edge).strip().lower()
|
||||
if key not in seen:
|
||||
seen.add(key)
|
||||
result.append(edge)
|
||||
for edge in branch_edges:
|
||||
key = str(edge).strip().lower()
|
||||
if key not in seen:
|
||||
seen.add(key)
|
||||
result.append(edge)
|
||||
return result
|
||||
|
||||
|
||||
def serialize_edge_fields(raw_fm_text: str, merged_edges: dict[str, list]) -> str:
|
||||
"""Splice merged edge fields into raw frontmatter text, preserving all other fields byte-identical.
|
||||
|
||||
Only modifies REWEAVE_EDGE_FIELDS lines. All other frontmatter (title, confidence, type, etc.)
|
||||
stays exactly as it was in the source text — no yaml.dump reformatting.
|
||||
|
||||
Args:
|
||||
raw_fm_text: The raw YAML text between the --- delimiters (no delimiters included).
|
||||
merged_edges: {field_name: [edge_values]} for each edge field that should be present.
|
||||
"""
|
||||
lines = raw_fm_text.split("\n")
|
||||
result_lines = []
|
||||
i = 0
|
||||
fields_written = set()
|
||||
|
||||
while i < len(lines):
|
||||
line = lines[i]
|
||||
# Check if this line starts an edge field
|
||||
matched_field = None
|
||||
for field in REWEAVE_EDGE_FIELDS:
|
||||
if line.startswith(f"{field}:"):
|
||||
matched_field = field
|
||||
break
|
||||
|
||||
if matched_field:
|
||||
fields_written.add(matched_field)
|
||||
# Skip the old field and its list items (may be indented with spaces)
|
||||
i += 1
|
||||
while i < len(lines) and lines[i] and (lines[i][0] in (' ', '-')):
|
||||
i += 1
|
||||
# Write the merged version
|
||||
edges = merged_edges.get(matched_field, [])
|
||||
if edges:
|
||||
result_lines.append(f"{matched_field}:")
|
||||
for edge in edges:
|
||||
result_lines.append(f"- {_yaml_quote(edge)}")
|
||||
# Don't increment i — it's already past the old field
|
||||
continue
|
||||
else:
|
||||
result_lines.append(line)
|
||||
i += 1
|
||||
|
||||
# Append any new edge fields that didn't exist in the original
|
||||
for field in REWEAVE_EDGE_FIELDS:
|
||||
if field not in fields_written:
|
||||
edges = merged_edges.get(field, [])
|
||||
if edges:
|
||||
result_lines.append(f"{field}:")
|
||||
for edge in edges:
|
||||
result_lines.append(f"- {_yaml_quote(edge)}")
|
||||
|
||||
return "\n".join(result_lines)
|
||||
|
||||
|
||||
def serialize_frontmatter(raw_fm_text: str, merged_edges: dict[str, list], body: str) -> str:
|
||||
"""Rebuild markdown file: splice merged edges into raw frontmatter, append body.
|
||||
|
||||
Uses string-level surgery — only edge fields are modified. All other frontmatter
|
||||
stays byte-identical to the source. No yaml.dump reformatting.
|
||||
"""
|
||||
spliced = serialize_edge_fields(raw_fm_text, merged_edges)
|
||||
# body starts with \n--- (closing delimiter + body text)
|
||||
if body.startswith("\n"):
|
||||
return f"---\n{spliced}{body}"
|
||||
return f"---\n{spliced}\n{body}"
|
||||
187
lib/github_feedback.py
Normal file
187
lib/github_feedback.py
Normal file
|
|
@ -0,0 +1,187 @@
|
|||
"""GitHub PR feedback — posts pipeline status to GitHub PRs for external contributors.
|
||||
|
||||
Three touchpoints:
|
||||
1. Discovery ack: when pipeline discovers a mirrored PR
|
||||
2. Eval review: when evaluation completes (approved or rejected with reasoning)
|
||||
3. Merge/close outcome: when PR is merged or permanently closed
|
||||
|
||||
Only fires for PRs with a github_pr link (set by sync-mirror.sh).
|
||||
All calls are non-fatal — GitHub feedback never blocks the pipeline.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
|
||||
import aiohttp
|
||||
|
||||
from . import config
|
||||
|
||||
logger = logging.getLogger("pipeline.github_feedback")
|
||||
|
||||
GITHUB_API = "https://api.github.com"
|
||||
GITHUB_REPO = "living-ip/teleo-codex"
|
||||
|
||||
_BOT_ACCOUNTS = frozenset({"m3taversal", "teleo-bot", "teleo", "github-actions[bot]"})
|
||||
|
||||
|
||||
def _github_pat() -> str | None:
|
||||
pat_file = config.SECRETS_DIR / "github-pat"
|
||||
if pat_file.exists():
|
||||
return pat_file.read_text().strip()
|
||||
return os.environ.get("GITHUB_PAT")
|
||||
|
||||
|
||||
async def _post_comment(github_pr: int, body: str) -> bool:
|
||||
pat = _github_pat()
|
||||
if not pat:
|
||||
logger.warning("No GitHub PAT — skipping feedback for GH PR #%d", github_pr)
|
||||
return False
|
||||
|
||||
url = f"{GITHUB_API}/repos/{GITHUB_REPO}/issues/{github_pr}/comments"
|
||||
headers = {
|
||||
"Authorization": f"Bearer {pat}",
|
||||
"Accept": "application/vnd.github+json",
|
||||
"X-GitHub-Api-Version": "2022-11-28",
|
||||
}
|
||||
|
||||
try:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.post(
|
||||
url, headers=headers, json={"body": body},
|
||||
timeout=aiohttp.ClientTimeout(total=30),
|
||||
) as resp:
|
||||
if resp.status >= 400:
|
||||
text = await resp.text()
|
||||
logger.error("GitHub comment on PR #%d failed: %d %s", github_pr, resp.status, text[:200])
|
||||
return False
|
||||
logger.info("GitHub comment posted on PR #%d", github_pr)
|
||||
return True
|
||||
except Exception:
|
||||
logger.exception("GitHub comment on PR #%d failed", github_pr)
|
||||
return False
|
||||
|
||||
|
||||
async def _close_github_pr(github_pr: int) -> bool:
|
||||
pat = _github_pat()
|
||||
if not pat:
|
||||
return False
|
||||
|
||||
url = f"{GITHUB_API}/repos/{GITHUB_REPO}/pulls/{github_pr}"
|
||||
headers = {
|
||||
"Authorization": f"Bearer {pat}",
|
||||
"Accept": "application/vnd.github+json",
|
||||
"X-GitHub-Api-Version": "2022-11-28",
|
||||
}
|
||||
|
||||
try:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.patch(
|
||||
url, headers=headers, json={"state": "closed"},
|
||||
timeout=aiohttp.ClientTimeout(total=30),
|
||||
) as resp:
|
||||
if resp.status >= 400:
|
||||
text = await resp.text()
|
||||
logger.error("GitHub close PR #%d failed: %d %s", github_pr, resp.status, text[:200])
|
||||
return False
|
||||
logger.info("GitHub PR #%d closed", github_pr)
|
||||
return True
|
||||
except Exception:
|
||||
logger.exception("GitHub close PR #%d failed", github_pr)
|
||||
return False
|
||||
|
||||
|
||||
def _get_github_pr(conn, forgejo_pr: int) -> int | None:
|
||||
row = conn.execute(
|
||||
"SELECT github_pr FROM prs WHERE number = ? AND github_pr IS NOT NULL",
|
||||
(forgejo_pr,),
|
||||
).fetchone()
|
||||
return row["github_pr"] if row else None
|
||||
|
||||
|
||||
async def on_discovery(conn, forgejo_pr: int):
|
||||
"""Post discovery acknowledgment to GitHub PR."""
|
||||
gh_pr = _get_github_pr(conn, forgejo_pr)
|
||||
if not gh_pr:
|
||||
return
|
||||
|
||||
body = (
|
||||
"Your contribution has been received by the Teleo evaluation pipeline. "
|
||||
"It's queued for automated review (priority: high).\n\n"
|
||||
"You'll receive updates here as it progresses through evaluation.\n\n"
|
||||
"_Automated message from the [LivingIP](https://livingip.xyz) pipeline._"
|
||||
)
|
||||
await _post_comment(gh_pr, body)
|
||||
|
||||
|
||||
async def on_eval_complete(conn, forgejo_pr: int, *, outcome: str, review_text: str = None, issues: list[str] = None):
|
||||
"""Post evaluation result to GitHub PR.
|
||||
|
||||
outcome: 'approved', 'rejected', 'changes_requested'
|
||||
"""
|
||||
gh_pr = _get_github_pr(conn, forgejo_pr)
|
||||
if not gh_pr:
|
||||
return
|
||||
|
||||
if outcome == "approved":
|
||||
body = "**Evaluation: Approved**\n\nYour contribution passed automated review and is queued for merge."
|
||||
if review_text:
|
||||
safe_text = review_text[:3000].replace("</details>", "</details>")
|
||||
body += f"\n\n<details>\n<summary>Review details</summary>\n\n{safe_text}\n\n</details>"
|
||||
elif outcome == "rejected":
|
||||
body = "**Evaluation: Changes Requested**\n\n"
|
||||
if issues:
|
||||
body += "Issues found:\n"
|
||||
for issue in issues:
|
||||
body += f"- {issue}\n"
|
||||
if review_text:
|
||||
safe_text = review_text[:3000].replace("</details>", "</details>")
|
||||
body += f"\n<details>\n<summary>Full review</summary>\n\n{safe_text}\n\n</details>"
|
||||
body += (
|
||||
"\n\nThe pipeline will attempt automated fixes where possible. "
|
||||
"If fixes fail, the PR will be closed — you're welcome to resubmit."
|
||||
)
|
||||
else:
|
||||
body = f"**Evaluation: {outcome}**\n\n"
|
||||
if review_text:
|
||||
body += review_text[:3000]
|
||||
|
||||
body += "\n\n_Automated message from the [LivingIP](https://livingip.xyz) pipeline._"
|
||||
await _post_comment(gh_pr, body)
|
||||
|
||||
|
||||
async def on_merged(conn, forgejo_pr: int, *, claims_count: int = None):
|
||||
"""Post merge confirmation and close GitHub PR."""
|
||||
gh_pr = _get_github_pr(conn, forgejo_pr)
|
||||
if not gh_pr:
|
||||
return
|
||||
|
||||
body = "**Merged!** Your contribution has been merged into the knowledge base."
|
||||
if claims_count and claims_count > 0:
|
||||
body += f" ({claims_count} claim{'s' if claims_count != 1 else ''} added)"
|
||||
body += (
|
||||
"\n\nThank you for contributing to LivingIP. "
|
||||
"Your attribution has been recorded.\n\n"
|
||||
"_Automated message from the [LivingIP](https://livingip.xyz) pipeline._"
|
||||
)
|
||||
await _post_comment(gh_pr, body)
|
||||
await _close_github_pr(gh_pr)
|
||||
|
||||
|
||||
async def on_closed(conn, forgejo_pr: int, *, reason: str = None):
|
||||
"""Post closure notification and close GitHub PR."""
|
||||
gh_pr = _get_github_pr(conn, forgejo_pr)
|
||||
if not gh_pr:
|
||||
return
|
||||
|
||||
body = "**Closed.** "
|
||||
if reason:
|
||||
body += reason
|
||||
else:
|
||||
body += "This PR was closed after evaluation."
|
||||
body += (
|
||||
"\n\nYou're welcome to resubmit with changes. "
|
||||
"See the evaluation feedback above for guidance.\n\n"
|
||||
"_Automated message from the [LivingIP](https://livingip.xyz) pipeline._"
|
||||
)
|
||||
await _post_comment(gh_pr, body)
|
||||
await _close_github_pr(gh_pr)
|
||||
118
lib/health.py
118
lib/health.py
|
|
@ -11,6 +11,7 @@ from . import config, costs, db
|
|||
from .analytics import get_snapshot_history, get_version_changes
|
||||
from .claim_index import build_claim_index, write_claim_index
|
||||
from .feedback import get_agent_error_patterns, get_all_agent_patterns
|
||||
from .search import check_duplicate
|
||||
|
||||
logger = logging.getLogger("pipeline.health")
|
||||
|
||||
|
|
@ -307,6 +308,121 @@ async def handle_metrics(request):
|
|||
})
|
||||
|
||||
|
||||
def pr_status(conn, pr_number: int | None = None, branch: str | None = None) -> dict:
|
||||
"""Get PR status for agent consumption.
|
||||
|
||||
Look up by PR number or branch name. Returns state, eval verdicts,
|
||||
merge status, time in queue, and rejection reasons.
|
||||
|
||||
Args:
|
||||
conn: SQLite connection with row_factory=sqlite3.Row
|
||||
pr_number: PR number to look up
|
||||
branch: Branch name to look up (fallback if no pr_number)
|
||||
|
||||
Returns dict with PR state or {"error": "not_found"}.
|
||||
"""
|
||||
if pr_number is not None:
|
||||
row = conn.execute(
|
||||
"""SELECT number, branch, source_path, status, domain, agent,
|
||||
commit_type, tier, leo_verdict, domain_verdict,
|
||||
domain_agent, eval_issues, priority, origin,
|
||||
cost_usd, created_at, merged_at, last_attempt, last_error,
|
||||
transient_retries, substantive_retries, description
|
||||
FROM prs WHERE number = ?""",
|
||||
(pr_number,),
|
||||
).fetchone()
|
||||
elif branch:
|
||||
row = conn.execute(
|
||||
"""SELECT number, branch, source_path, status, domain, agent,
|
||||
commit_type, tier, leo_verdict, domain_verdict,
|
||||
domain_agent, eval_issues, priority, origin,
|
||||
cost_usd, created_at, merged_at, last_attempt, last_error,
|
||||
transient_retries, substantive_retries, description
|
||||
FROM prs WHERE branch = ?
|
||||
ORDER BY number DESC LIMIT 1""",
|
||||
(branch,),
|
||||
).fetchone()
|
||||
else:
|
||||
return {"error": "pr_number or branch required"}
|
||||
|
||||
if not row:
|
||||
return {"error": "not_found"}
|
||||
|
||||
# Parse eval issues
|
||||
issues = []
|
||||
try:
|
||||
issues = json.loads(row["eval_issues"] or "[]")
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
pass
|
||||
|
||||
# Time in queue (created → now or merged)
|
||||
time_in_queue_minutes = None
|
||||
if row["created_at"]:
|
||||
try:
|
||||
created = datetime.fromisoformat(row["created_at"])
|
||||
if created.tzinfo is None:
|
||||
created = created.replace(tzinfo=timezone.utc)
|
||||
if row["merged_at"]:
|
||||
end = datetime.fromisoformat(row["merged_at"])
|
||||
if end.tzinfo is None:
|
||||
end = end.replace(tzinfo=timezone.utc)
|
||||
else:
|
||||
end = datetime.now(timezone.utc)
|
||||
time_in_queue_minutes = round((end - created).total_seconds() / 60, 1)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
return {
|
||||
"pr": row["number"],
|
||||
"branch": row["branch"],
|
||||
"source": row["source_path"],
|
||||
"status": row["status"],
|
||||
"domain": row["domain"],
|
||||
"agent": row["agent"],
|
||||
"commit_type": row["commit_type"],
|
||||
"tier": row["tier"],
|
||||
"leo_verdict": row["leo_verdict"],
|
||||
"domain_verdict": row["domain_verdict"],
|
||||
"domain_agent": row["domain_agent"],
|
||||
"eval_issues": issues,
|
||||
"priority": row["priority"],
|
||||
"origin": row["origin"],
|
||||
"cost_usd": row["cost_usd"],
|
||||
"created_at": row["created_at"],
|
||||
"merged_at": row["merged_at"],
|
||||
"last_attempt": row["last_attempt"],
|
||||
"last_error": row["last_error"],
|
||||
"retries": {
|
||||
"transient": row["transient_retries"],
|
||||
"substantive": row["substantive_retries"],
|
||||
},
|
||||
"description": row["description"],
|
||||
"time_in_queue_minutes": time_in_queue_minutes,
|
||||
}
|
||||
|
||||
|
||||
async def handle_pr_status(request):
|
||||
"""GET /pr/{number} — single PR status for agent consumption."""
|
||||
conn = _conn(request)
|
||||
try:
|
||||
pr_number = int(request.match_info["number"])
|
||||
except (KeyError, ValueError):
|
||||
return web.json_response({"error": "invalid pr number"}, status=400)
|
||||
result = pr_status(conn, pr_number=pr_number)
|
||||
status_code = 200 if "error" not in result else 404
|
||||
return web.json_response(result, status=status_code)
|
||||
|
||||
|
||||
async def handle_check_duplicate(request):
|
||||
"""GET /check-duplicate?text=...&domain=... — near-duplicate detection."""
|
||||
text = request.query.get("text", "")
|
||||
if not text:
|
||||
return web.json_response({"error": "text parameter required"}, status=400)
|
||||
domain = request.query.get("domain")
|
||||
result = check_duplicate(text, domain=domain)
|
||||
return web.json_response(result)
|
||||
|
||||
|
||||
async def handle_activity(request):
|
||||
"""GET /activity — condensed PR activity feed (Rhea).
|
||||
|
||||
|
|
@ -688,6 +804,8 @@ def create_app() -> web.Application:
|
|||
app.router.add_get("/contributors", handle_contributors_list)
|
||||
app.router.add_get("/", handle_dashboard)
|
||||
app.router.add_get("/activity", handle_activity)
|
||||
app.router.add_get("/pr/{number}", handle_pr_status)
|
||||
app.router.add_get("/check-duplicate", handle_check_duplicate)
|
||||
app.router.add_get("/calibration", handle_calibration)
|
||||
app.router.add_get("/feedback/{agent}", handle_feedback)
|
||||
app.router.add_get("/feedback", handle_feedback_all)
|
||||
|
|
|
|||
114
lib/llm.py
114
lib/llm.py
|
|
@ -10,6 +10,7 @@ Orchestration (PR lifecycle, SQLite state, Forgejo posting) stays in evaluate.py
|
|||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
|
||||
import aiohttp
|
||||
|
|
@ -224,12 +225,16 @@ where NUMBER is the PR number shown in the section header."""
|
|||
|
||||
async def openrouter_call(
|
||||
model: str, prompt: str, timeout_sec: int = 120, max_tokens: int = 4096,
|
||||
) -> str | None:
|
||||
"""Call OpenRouter API. Returns response text or None on failure."""
|
||||
) -> tuple[str | None, dict]:
|
||||
"""Call OpenRouter API. Returns (response_text, usage_dict).
|
||||
|
||||
usage_dict has keys: prompt_tokens, completion_tokens (0 on failure).
|
||||
"""
|
||||
empty_usage = {"prompt_tokens": 0, "completion_tokens": 0}
|
||||
key_file = config.SECRETS_DIR / "openrouter-key"
|
||||
if not key_file.exists():
|
||||
logger.error("OpenRouter key file not found")
|
||||
return None
|
||||
return None, empty_usage
|
||||
key = key_file.read_text().strip()
|
||||
|
||||
payload = {
|
||||
|
|
@ -250,23 +255,37 @@ async def openrouter_call(
|
|||
if resp.status >= 400:
|
||||
text = await resp.text()
|
||||
logger.error("OpenRouter %s → %d: %s", model, resp.status, text[:200])
|
||||
return None
|
||||
return None, empty_usage
|
||||
data = await resp.json()
|
||||
return data.get("choices", [{}])[0].get("message", {}).get("content")
|
||||
usage = data.get("usage", empty_usage)
|
||||
content = data.get("choices", [{}])[0].get("message", {}).get("content")
|
||||
return content, usage
|
||||
except Exception as e:
|
||||
logger.error("OpenRouter error: %s → %s", model, e)
|
||||
return None
|
||||
return None, empty_usage
|
||||
|
||||
|
||||
async def claude_cli_call(model: str, prompt: str, timeout_sec: int = 600, cwd: str = None) -> str | None:
|
||||
"""Call Claude via CLI (Claude Max subscription). Returns response or None."""
|
||||
async def claude_cli_call(model: str, prompt: str, timeout_sec: int = 600, cwd: str = None) -> tuple[str | None, dict]:
|
||||
"""Call Claude via CLI (Claude Max subscription). Returns (response, usage).
|
||||
|
||||
Uses --output-format json to capture token usage. Subscription calls cost $0
|
||||
but tokens are tracked for compute metrics (Cory: capture tokens/time, note subscription).
|
||||
"""
|
||||
empty_usage = {
|
||||
"prompt_tokens": 0, "completion_tokens": 0,
|
||||
"cache_read_tokens": 0, "cache_write_tokens": 0,
|
||||
"duration_ms": 0, "duration_api_ms": 0,
|
||||
"cost_estimate_usd": 0.0,
|
||||
"stop_reason": "", "num_turns": 0,
|
||||
"service_tier": "", "speed": "",
|
||||
}
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
str(config.CLAUDE_CLI),
|
||||
"-p",
|
||||
"--model",
|
||||
model,
|
||||
"--output-format",
|
||||
"text",
|
||||
"json",
|
||||
cwd=cwd or str(config.REPO_DIR),
|
||||
stdin=asyncio.subprocess.PIPE,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
|
|
@ -282,7 +301,7 @@ async def claude_cli_call(model: str, prompt: str, timeout_sec: int = 600, cwd:
|
|||
proc.kill()
|
||||
await proc.wait()
|
||||
logger.error("Claude CLI timed out after %ds", timeout_sec)
|
||||
return None
|
||||
return None, empty_usage
|
||||
finally:
|
||||
_active_subprocesses.discard(proc)
|
||||
|
||||
|
|
@ -293,43 +312,66 @@ async def claude_cli_call(model: str, prompt: str, timeout_sec: int = 600, cwd:
|
|||
combined_lower = (out_text + err_text).lower()
|
||||
if "hit your limit" in combined_lower or "rate limit" in combined_lower:
|
||||
logger.warning("Claude Max rate limited (rc=%d, stdout: %s)", proc.returncode, out_text[:200])
|
||||
return "RATE_LIMITED"
|
||||
return "RATE_LIMITED", empty_usage
|
||||
|
||||
if proc.returncode != 0:
|
||||
logger.error("Claude CLI failed (rc=%d): stderr=%s stdout=%s", proc.returncode, err_text[:200], out_text[:200])
|
||||
return None
|
||||
return None, empty_usage
|
||||
|
||||
return out_text.strip()
|
||||
# Parse JSON output to extract full usage telemetry
|
||||
usage = empty_usage.copy()
|
||||
try:
|
||||
data = json.loads(out_text)
|
||||
text = data.get("result", "")
|
||||
raw_usage = data.get("usage", {})
|
||||
usage = {
|
||||
"prompt_tokens": raw_usage.get("input_tokens", 0),
|
||||
"completion_tokens": raw_usage.get("output_tokens", 0),
|
||||
"cache_read_tokens": raw_usage.get("cache_read_input_tokens", 0),
|
||||
"cache_write_tokens": raw_usage.get("cache_creation_input_tokens", 0),
|
||||
"duration_ms": data.get("duration_ms", 0),
|
||||
"duration_api_ms": data.get("duration_api_ms", 0),
|
||||
"cost_estimate_usd": data.get("total_cost_usd", 0.0),
|
||||
"stop_reason": data.get("stop_reason", ""),
|
||||
"num_turns": data.get("num_turns", 0),
|
||||
"service_tier": raw_usage.get("service_tier", ""),
|
||||
"speed": raw_usage.get("speed", ""),
|
||||
}
|
||||
except (json.JSONDecodeError, KeyError):
|
||||
logger.warning("Claude CLI returned non-JSON output, token tracking unavailable")
|
||||
text = out_text.strip()
|
||||
|
||||
return text, usage
|
||||
|
||||
|
||||
# ─── Review execution ─────────────────────────────────────────────────────
|
||||
|
||||
|
||||
async def triage_pr(diff: str) -> str:
|
||||
"""Triage PR via Haiku → DEEP/STANDARD/LIGHT."""
|
||||
async def triage_pr(diff: str) -> tuple[str, dict, str]:
|
||||
"""Triage PR via Haiku → (tier, usage, reason). tier is DEEP/STANDARD/LIGHT."""
|
||||
prompt = TRIAGE_PROMPT.format(diff=diff[:50000]) # Cap diff size for triage
|
||||
result = await openrouter_call(config.TRIAGE_MODEL, prompt, timeout_sec=30)
|
||||
result, usage = await openrouter_call(config.TRIAGE_MODEL, prompt, timeout_sec=30)
|
||||
if not result:
|
||||
logger.warning("Triage failed, defaulting to STANDARD")
|
||||
return "STANDARD"
|
||||
return "STANDARD", usage, "triage failed, default"
|
||||
|
||||
tier = result.split("\n")[0].strip().upper()
|
||||
if tier in ("DEEP", "STANDARD", "LIGHT"):
|
||||
reason = result.split("\n")[1].strip() if "\n" in result else ""
|
||||
logger.info("Triage: %s — %s", tier, reason[:100])
|
||||
return tier
|
||||
return tier, usage, reason[:500]
|
||||
|
||||
logger.warning("Triage returned unparseable '%s', defaulting to STANDARD", tier[:20])
|
||||
return "STANDARD"
|
||||
return "STANDARD", usage, f"unparseable response, default (got: {tier[:20]})"
|
||||
|
||||
|
||||
async def run_batch_domain_review(
|
||||
pr_diffs: list[dict], domain: str, agent: str,
|
||||
) -> str | None:
|
||||
) -> tuple[str | None, dict]:
|
||||
"""Run batched domain review for multiple PRs in one LLM call.
|
||||
|
||||
pr_diffs: list of {"number": int, "label": str, "diff": str, "files": str}
|
||||
Returns raw response text or None on failure.
|
||||
Returns (raw_response_text, usage) or (None, usage) on failure.
|
||||
"""
|
||||
# Build per-PR sections with anchoring labels
|
||||
sections = []
|
||||
|
|
@ -351,18 +393,19 @@ async def run_batch_domain_review(
|
|||
|
||||
# Scale max_tokens with batch size: ~3K tokens per PR review
|
||||
max_tokens = min(3000 * len(pr_diffs), 16384)
|
||||
result = await openrouter_call(
|
||||
result, usage = await openrouter_call(
|
||||
config.EVAL_DOMAIN_MODEL, prompt,
|
||||
timeout_sec=config.EVAL_TIMEOUT, max_tokens=max_tokens,
|
||||
)
|
||||
return result
|
||||
return result, usage
|
||||
|
||||
|
||||
async def run_domain_review(diff: str, files: str, domain: str, agent: str) -> str | None:
|
||||
"""Run domain review via OpenRouter GPT-4o.
|
||||
async def run_domain_review(diff: str, files: str, domain: str, agent: str) -> tuple[str | None, dict]:
|
||||
"""Run domain review via OpenRouter.
|
||||
|
||||
Decoupled from Claude Max to avoid account-level rate limits blocking
|
||||
domain reviews. Different model lineage also reduces correlated blind spots.
|
||||
Returns (review_text, usage).
|
||||
"""
|
||||
prompt = DOMAIN_PROMPT.format(
|
||||
agent=agent,
|
||||
|
|
@ -373,16 +416,17 @@ async def run_domain_review(diff: str, files: str, domain: str, agent: str) -> s
|
|||
files=files,
|
||||
)
|
||||
|
||||
result = await openrouter_call(config.EVAL_DOMAIN_MODEL, prompt, timeout_sec=config.EVAL_TIMEOUT)
|
||||
return result
|
||||
result, usage = await openrouter_call(config.EVAL_DOMAIN_MODEL, prompt, timeout_sec=config.EVAL_TIMEOUT)
|
||||
return result, usage
|
||||
|
||||
|
||||
async def run_leo_review(diff: str, files: str, tier: str) -> str | None:
|
||||
async def run_leo_review(diff: str, files: str, tier: str) -> tuple[str | None, dict]:
|
||||
"""Run Leo review. DEEP → Opus (Claude Max, queue if limited). STANDARD → GPT-4o (OpenRouter).
|
||||
|
||||
Opus is scarce — reserved for DEEP eval and overnight research sessions.
|
||||
STANDARD goes straight to GPT-4o. Domain review is the primary gate;
|
||||
Leo review is a quality check that doesn't need Opus for routine claims.
|
||||
Returns (review_text, usage).
|
||||
"""
|
||||
prompt_template = LEO_PROMPT_DEEP if tier == "DEEP" else LEO_PROMPT_STANDARD
|
||||
prompt = prompt_template.format(style_guide=REVIEW_STYLE_GUIDE, diff=diff, files=files)
|
||||
|
|
@ -394,14 +438,14 @@ async def run_leo_review(diff: str, files: str, tier: str) -> str | None:
|
|||
# (Cory, Mar 14: "yes lets skip opus")
|
||||
#
|
||||
# --- Re-enable Opus later (uses EVAL_TIMEOUT_OPUS for longer reasoning): ---
|
||||
# result = await claude_cli_call(config.EVAL_LEO_MODEL, prompt, timeout_sec=config.EVAL_TIMEOUT_OPUS)
|
||||
# result, usage = await claude_cli_call(config.EVAL_LEO_MODEL, prompt, timeout_sec=config.EVAL_TIMEOUT_OPUS)
|
||||
# if result == "RATE_LIMITED" or result is None:
|
||||
# logger.info("Opus unavailable for DEEP Leo review — overflowing to Sonnet")
|
||||
# result = await openrouter_call(config.EVAL_LEO_STANDARD_MODEL, prompt, timeout_sec=config.EVAL_TIMEOUT_OPUS)
|
||||
# return result
|
||||
result = await openrouter_call(config.EVAL_LEO_STANDARD_MODEL, prompt, timeout_sec=config.EVAL_TIMEOUT)
|
||||
return result
|
||||
# result, usage = await openrouter_call(config.EVAL_LEO_STANDARD_MODEL, prompt, timeout_sec=config.EVAL_TIMEOUT_OPUS)
|
||||
# return result, usage
|
||||
result, usage = await openrouter_call(config.EVAL_LEO_STANDARD_MODEL, prompt, timeout_sec=config.EVAL_TIMEOUT)
|
||||
return result, usage
|
||||
else:
|
||||
# STANDARD/LIGHT: Sonnet via OpenRouter — 120s timeout (routine calls)
|
||||
result = await openrouter_call(config.EVAL_LEO_STANDARD_MODEL, prompt, timeout_sec=config.EVAL_TIMEOUT)
|
||||
return result
|
||||
result, usage = await openrouter_call(config.EVAL_LEO_STANDARD_MODEL, prompt, timeout_sec=config.EVAL_TIMEOUT)
|
||||
return result, usage
|
||||
|
|
|
|||
937
lib/merge.py
937
lib/merge.py
File diff suppressed because it is too large
Load diff
|
|
@ -163,15 +163,29 @@ def fix_frontmatter(content: str, domain: str, agent: str) -> tuple[str, list[st
|
|||
|
||||
|
||||
def fix_wiki_links(content: str, existing_claims: set[str]) -> tuple[str, list[str]]:
|
||||
"""Strip brackets from broken wiki links, keeping the text. Returns (fixed_content, fixes)."""
|
||||
"""Fix or strip broken wiki links. Resolves slug→space mismatches before stripping.
|
||||
|
||||
The LLM often generates wiki links as slugs (hyphens) but KB filenames use spaces.
|
||||
Try normalizing hyphens→spaces before giving up and stripping brackets.
|
||||
"""
|
||||
fixes = []
|
||||
# Build a lookup: normalized (lowercased, hyphens→spaces) → original stem
|
||||
_normalized_lookup: dict[str, str] = {}
|
||||
for stem in existing_claims:
|
||||
_normalized_lookup[stem.lower().replace("-", " ")] = stem
|
||||
|
||||
def replace_broken(match):
|
||||
link = match.group(1).strip()
|
||||
if link not in existing_claims:
|
||||
fixes.append(f"stripped_wiki_link:{link[:60]}")
|
||||
return link # Keep text, remove brackets
|
||||
return match.group(0)
|
||||
if link in existing_claims:
|
||||
return match.group(0) # Exact match — keep as-is
|
||||
# Try normalizing slug to spaces
|
||||
normalized = link.lower().replace("-", " ")
|
||||
if normalized in _normalized_lookup:
|
||||
resolved = _normalized_lookup[normalized]
|
||||
fixes.append(f"resolved_wiki_link:{link[:40]}->{resolved[:40]}")
|
||||
return f"[[{resolved}]]"
|
||||
fixes.append(f"stripped_wiki_link:{link[:60]}")
|
||||
return link # Keep text, remove brackets
|
||||
|
||||
fixed = WIKI_LINK_RE.sub(replace_broken, content)
|
||||
return fixed, fixes
|
||||
|
|
@ -212,7 +226,7 @@ def fix_h1_title_match(content: str, filename: str) -> tuple[str, list[str]]:
|
|||
# ─── Validators (check without modifying, return issues) ──────────────────
|
||||
|
||||
|
||||
def validate_claim(filename: str, content: str, existing_claims: set[str]) -> list[str]:
|
||||
def validate_claim(filename: str, content: str, existing_claims: set[str], agent: str | None = None) -> list[str]:
|
||||
"""Validate a claim file. Returns list of issues (empty = pass)."""
|
||||
issues = []
|
||||
fm, body = parse_frontmatter(content)
|
||||
|
|
@ -271,7 +285,7 @@ def validate_claim(filename: str, content: str, existing_claims: set[str]) -> li
|
|||
# Attribution check: extractor must be identified. (Leo: block extractor, warn sourcer)
|
||||
if ftype == "claim":
|
||||
from .attribution import validate_attribution
|
||||
issues.extend(validate_attribution(fm))
|
||||
issues.extend(validate_attribution(fm, agent=agent))
|
||||
|
||||
# OPSEC check: flag claims containing dollar amounts + internal entity references.
|
||||
# Rio's rule: never extract LivingIP/Teleo deal terms to public codex. (Ganymede review)
|
||||
|
|
@ -358,7 +372,7 @@ def validate_and_fix_claims(
|
|||
all_fixes.extend([f"{filename}:{f}" for f in fixes])
|
||||
|
||||
# Phase 2: Validate (after fixes)
|
||||
issues = validate_claim(filename, content, existing_claims)
|
||||
issues = validate_claim(filename, content, existing_claims, agent=agent)
|
||||
|
||||
# Separate hard failures from warnings
|
||||
hard_failures = [i for i in issues if not i.startswith("near_duplicate")]
|
||||
|
|
@ -504,6 +518,24 @@ def _rebuild_content(fm: dict, body: str) -> str:
|
|||
|
||||
def _yaml_line(key: str, val) -> str:
|
||||
"""Format a single YAML key-value line."""
|
||||
if isinstance(val, dict):
|
||||
# Nested YAML block (e.g. attribution with sub-keys)
|
||||
lines = [f"{key}:"]
|
||||
for sub_key, sub_val in val.items():
|
||||
if isinstance(sub_val, list) and sub_val:
|
||||
lines.append(f" {sub_key}:")
|
||||
for item in sub_val:
|
||||
if isinstance(item, dict):
|
||||
first = True
|
||||
for ik, iv in item.items():
|
||||
prefix = " - " if first else " "
|
||||
lines.append(f'{prefix}{ik}: "{iv}"')
|
||||
first = False
|
||||
else:
|
||||
lines.append(f' - "{item}"')
|
||||
else:
|
||||
lines.append(f" {sub_key}: []")
|
||||
return "\n".join(lines)
|
||||
if isinstance(val, list):
|
||||
return f"{key}: {json.dumps(val)}"
|
||||
if isinstance(val, bool):
|
||||
|
|
|
|||
518
lib/post_merge.py
Normal file
518
lib/post_merge.py
Normal file
|
|
@ -0,0 +1,518 @@
|
|||
"""Post-merge effects: embedding, reciprocal edges, source archiving.
|
||||
|
||||
All functions run after a PR is merged to main. Non-fatal failures
|
||||
are logged but do not block the pipeline.
|
||||
|
||||
Extracted from merge.py Phase 6b of decomposition.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from typing import Callable
|
||||
|
||||
from . import config
|
||||
from .frontmatter import (
|
||||
REWEAVE_EDGE_FIELDS,
|
||||
RECIPROCAL_EDGE_MAP,
|
||||
parse_yaml_frontmatter,
|
||||
serialize_edge_fields,
|
||||
)
|
||||
|
||||
try:
|
||||
from .worktree_lock import async_main_worktree_lock
|
||||
except ImportError:
|
||||
from worktree_lock import async_main_worktree_lock
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Accumulates source moves during a merge cycle, batch-committed at the end
|
||||
_pending_source_moves: list[tuple[str, str]] = [] # (queue_path, archive_path)
|
||||
|
||||
|
||||
def update_source_frontmatter_status(path: str, new_status: str):
|
||||
"""Update the status field in a source file's frontmatter. (Ganymede: 5 lines)"""
|
||||
try:
|
||||
text = open(path).read()
|
||||
text = re.sub(r"^status: .*$", f"status: {new_status}", text, count=1, flags=re.MULTILINE)
|
||||
open(path, "w").write(text)
|
||||
except Exception as e:
|
||||
logger.warning("Failed to update source status in %s: %s", path, e)
|
||||
|
||||
|
||||
async def embed_merged_claims(main_sha: str, branch_sha: str, git_fn: Callable):
|
||||
"""Embed new/changed claim files from a merged PR into Qdrant.
|
||||
|
||||
Diffs main_sha (pre-merge main HEAD) against branch_sha (merged branch tip)
|
||||
to find ALL changed files across the entire branch, not just the last commit.
|
||||
Also deletes Qdrant vectors for files removed by the branch.
|
||||
|
||||
Non-fatal — embedding failure does not block the merge pipeline.
|
||||
"""
|
||||
try:
|
||||
# --- Embed added/changed files ---
|
||||
rc, diff_out = await git_fn(
|
||||
"diff", "--name-only", "--diff-filter=ACMR",
|
||||
main_sha, branch_sha,
|
||||
cwd=str(config.MAIN_WORKTREE),
|
||||
timeout=10,
|
||||
)
|
||||
if rc != 0:
|
||||
logger.warning("embed: diff failed (rc=%d), skipping", rc)
|
||||
return
|
||||
|
||||
embed_dirs = {"domains/", "core/", "foundations/", "decisions/", "entities/"}
|
||||
md_files = [
|
||||
f for f in diff_out.strip().split("\n")
|
||||
if f.endswith(".md")
|
||||
and any(f.startswith(d) for d in embed_dirs)
|
||||
and not f.split("/")[-1].startswith("_")
|
||||
]
|
||||
|
||||
embedded = 0
|
||||
for fpath in md_files:
|
||||
full_path = config.MAIN_WORKTREE / fpath
|
||||
if not full_path.exists():
|
||||
continue
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
"python3", "/opt/teleo-eval/embed-claims.py", "--file", str(full_path),
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
)
|
||||
stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=30)
|
||||
if proc.returncode == 0 and b"OK" in stdout:
|
||||
embedded += 1
|
||||
else:
|
||||
logger.warning("embed: failed for %s: %s", fpath, stderr.decode()[:200])
|
||||
|
||||
if embedded:
|
||||
logger.info("embed: %d/%d files embedded into Qdrant", embedded, len(md_files))
|
||||
|
||||
# --- Delete vectors for removed files (Ganymede: stale vector cleanup) ---
|
||||
rc, del_out = await git_fn(
|
||||
"diff", "--name-only", "--diff-filter=D",
|
||||
main_sha, branch_sha,
|
||||
cwd=str(config.MAIN_WORKTREE),
|
||||
timeout=10,
|
||||
)
|
||||
if rc == 0 and del_out.strip():
|
||||
deleted_files = [
|
||||
f for f in del_out.strip().split("\n")
|
||||
if f.endswith(".md")
|
||||
and any(f.startswith(d) for d in embed_dirs)
|
||||
]
|
||||
if deleted_files:
|
||||
point_ids = [hashlib.md5(f.encode()).hexdigest() for f in deleted_files]
|
||||
try:
|
||||
import urllib.request
|
||||
req = urllib.request.Request(
|
||||
"http://localhost:6333/collections/teleo-claims/points/delete",
|
||||
data=json.dumps({"points": point_ids}).encode(),
|
||||
headers={"Content-Type": "application/json"},
|
||||
method="POST",
|
||||
)
|
||||
urllib.request.urlopen(req, timeout=10)
|
||||
logger.info("embed: deleted %d stale vectors from Qdrant", len(point_ids))
|
||||
except Exception:
|
||||
logger.warning("embed: failed to delete stale vectors (non-fatal)")
|
||||
except Exception:
|
||||
logger.exception("embed: post-merge embedding failed (non-fatal)")
|
||||
|
||||
|
||||
def find_claim_file(slug: str):
|
||||
"""Find a claim file on disk by its slug. Searches domains/, core/, foundations/.
|
||||
|
||||
Returns Path or None.
|
||||
"""
|
||||
worktree = config.MAIN_WORKTREE
|
||||
for search_dir in ("domains", "core", "foundations"):
|
||||
base = worktree / search_dir
|
||||
if not base.is_dir():
|
||||
continue
|
||||
# Direct match
|
||||
for md in base.rglob(f"{slug}.md"):
|
||||
if not md.name.startswith("_"):
|
||||
return md
|
||||
return None
|
||||
|
||||
|
||||
def add_edge_to_file(file_path, edge_type: str, target_slug: str) -> bool:
|
||||
"""Add a single edge to a file's frontmatter. Returns True if modified."""
|
||||
try:
|
||||
content = file_path.read_text()
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
fm, raw_fm, body = parse_yaml_frontmatter(content)
|
||||
if fm is None:
|
||||
return False
|
||||
|
||||
# Check for existing edge (dedup)
|
||||
existing = fm.get(edge_type, [])
|
||||
if isinstance(existing, str):
|
||||
existing = [existing]
|
||||
if not isinstance(existing, list):
|
||||
existing = []
|
||||
|
||||
if any(str(e).strip().lower() == target_slug.lower() for e in existing):
|
||||
return False # Already exists
|
||||
|
||||
# Build merged edges (all edge fields, only modifying the target one)
|
||||
merged_edges = {}
|
||||
for field in REWEAVE_EDGE_FIELDS:
|
||||
vals = fm.get(field, [])
|
||||
if isinstance(vals, str):
|
||||
vals = [vals]
|
||||
if not isinstance(vals, list):
|
||||
vals = []
|
||||
merged_edges[field] = list(vals)
|
||||
|
||||
merged_edges.setdefault(edge_type, []).append(target_slug)
|
||||
|
||||
# Serialize using the same string-surgery approach as reweave
|
||||
new_fm = serialize_edge_fields(raw_fm, merged_edges)
|
||||
if body.startswith("\n"):
|
||||
new_content = f"---\n{new_fm}{body}"
|
||||
else:
|
||||
new_content = f"---\n{new_fm}\n{body}"
|
||||
|
||||
try:
|
||||
file_path.write_text(new_content)
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
async def reciprocal_edges(main_sha: str, branch_sha: str, git_fn: Callable):
|
||||
"""Add reciprocal edges on existing claims after a PR merges.
|
||||
|
||||
When a new claim A has `supports: [B]` in its frontmatter, B should have
|
||||
`supports: [A]` added to its own frontmatter. This gives A an incoming link,
|
||||
preventing it from being an orphan.
|
||||
|
||||
Runs on main after cherry-pick merge. Non-fatal — orphans are recoverable.
|
||||
Only processes new files (diff-filter=A), not modified files.
|
||||
"""
|
||||
EDGE_FIELDS = ("supports", "challenges", "related")
|
||||
|
||||
try:
|
||||
# Find newly added claim files
|
||||
rc, diff_out = await git_fn(
|
||||
"diff", "--name-only", "--diff-filter=A",
|
||||
main_sha, branch_sha,
|
||||
cwd=str(config.MAIN_WORKTREE),
|
||||
timeout=10,
|
||||
)
|
||||
if rc != 0:
|
||||
logger.warning("reciprocal_edges: diff failed (rc=%d), skipping", rc)
|
||||
return
|
||||
|
||||
claim_dirs = {"domains/", "core/", "foundations/"}
|
||||
new_claims = [
|
||||
f for f in diff_out.strip().split("\n")
|
||||
if f.endswith(".md")
|
||||
and any(f.startswith(d) for d in claim_dirs)
|
||||
and not f.split("/")[-1].startswith("_")
|
||||
and "/entities/" not in f
|
||||
and "/decisions/" not in f
|
||||
]
|
||||
|
||||
if not new_claims:
|
||||
return
|
||||
|
||||
reciprocals_added = 0
|
||||
modified_files = set()
|
||||
for claim_path in new_claims:
|
||||
full_path = config.MAIN_WORKTREE / claim_path
|
||||
if not full_path.exists():
|
||||
continue
|
||||
|
||||
try:
|
||||
content = full_path.read_text()
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
fm, raw_fm, body = parse_yaml_frontmatter(content)
|
||||
if fm is None:
|
||||
continue
|
||||
|
||||
# Get the new claim's slug (filename without .md)
|
||||
claim_slug = claim_path.rsplit("/", 1)[-1].replace(".md", "")
|
||||
|
||||
# Collect all edge targets from this new claim
|
||||
for field in EDGE_FIELDS:
|
||||
targets = fm.get(field, [])
|
||||
if isinstance(targets, str):
|
||||
targets = [targets]
|
||||
if not isinstance(targets, list):
|
||||
continue
|
||||
|
||||
for target_slug in targets:
|
||||
target_slug = str(target_slug).strip()
|
||||
if not target_slug:
|
||||
continue
|
||||
|
||||
# Find the target file on disk
|
||||
target_file = find_claim_file(target_slug)
|
||||
if target_file is None:
|
||||
continue
|
||||
|
||||
# Add reciprocal edge: target now has field: [new_claim_slug]
|
||||
reciprocal_type = RECIPROCAL_EDGE_MAP.get(field, "related")
|
||||
if add_edge_to_file(target_file, reciprocal_type, claim_slug):
|
||||
reciprocals_added += 1
|
||||
modified_files.add(str(target_file))
|
||||
|
||||
if reciprocals_added > 0:
|
||||
# Stage only the files we modified (never git add -A in automation)
|
||||
for f in modified_files:
|
||||
await git_fn("add", f, cwd=str(config.MAIN_WORKTREE))
|
||||
rc, out = await git_fn(
|
||||
"commit", "-m", f"reciprocal edges: {reciprocals_added} edges from {len(new_claims)} new claims",
|
||||
cwd=str(config.MAIN_WORKTREE),
|
||||
)
|
||||
if rc == 0:
|
||||
# Push immediately — batch-extract-50.sh does reset --hard origin/main
|
||||
# every 15 min, which destroys unpushed local commits
|
||||
push_rc, push_out = await git_fn(
|
||||
"push", "origin", "main",
|
||||
cwd=str(config.MAIN_WORKTREE),
|
||||
timeout=30,
|
||||
)
|
||||
if push_rc == 0:
|
||||
logger.info("reciprocal_edges: %d edges pushed to main (%d new claims)", reciprocals_added, len(new_claims))
|
||||
else:
|
||||
logger.warning("reciprocal_edges: push failed (commit is local only): %s", push_out[:200])
|
||||
else:
|
||||
logger.warning("reciprocal_edges: commit failed: %s", out[:200])
|
||||
|
||||
except Exception:
|
||||
logger.exception("reciprocal_edges: failed (non-fatal)")
|
||||
|
||||
|
||||
async def backlink_source_claims(main_sha: str, branch_sha: str, git_fn: Callable):
|
||||
"""After merge, update source files with claims_extracted backlinks.
|
||||
|
||||
Reads sourced_from from merged claim frontmatter, finds the source file,
|
||||
and appends the claim filename to its claims_extracted list.
|
||||
Only runs for newly added claims (diff-filter=A).
|
||||
"""
|
||||
try:
|
||||
rc, diff_out = await git_fn(
|
||||
"diff", "--name-only", "--diff-filter=A",
|
||||
main_sha, branch_sha,
|
||||
cwd=str(config.MAIN_WORKTREE),
|
||||
timeout=10,
|
||||
)
|
||||
if rc != 0:
|
||||
logger.warning("backlink_source_claims: diff failed (rc=%d), skipping", rc)
|
||||
return
|
||||
|
||||
claim_dirs = {"domains/", "core/", "foundations/"}
|
||||
new_claims = [
|
||||
f for f in diff_out.strip().split("\n")
|
||||
if f.endswith(".md")
|
||||
and any(f.startswith(d) for d in claim_dirs)
|
||||
and not f.split("/")[-1].startswith("_")
|
||||
and "/entities/" not in f
|
||||
and "/decisions/" not in f
|
||||
]
|
||||
|
||||
if not new_claims:
|
||||
return
|
||||
|
||||
modified_sources = {}
|
||||
for claim_path in new_claims:
|
||||
full_path = config.MAIN_WORKTREE / claim_path
|
||||
if not full_path.exists():
|
||||
continue
|
||||
|
||||
try:
|
||||
content = full_path.read_text()
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
fm, raw_fm, body = parse_yaml_frontmatter(content)
|
||||
if fm is None:
|
||||
continue
|
||||
|
||||
sourced_from = fm.get("sourced_from", "")
|
||||
if not sourced_from:
|
||||
continue
|
||||
|
||||
source_path = config.MAIN_WORKTREE / "inbox" / "archive" / sourced_from
|
||||
if not source_path.exists():
|
||||
logger.debug("backlink_source_claims: source %s not found at %s", sourced_from, source_path)
|
||||
continue
|
||||
|
||||
claim_filename = claim_path.rsplit("/", 1)[-1].replace(".md", "")
|
||||
|
||||
try:
|
||||
source_content = source_path.read_text()
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
source_fm, source_raw_fm, source_body = parse_yaml_frontmatter(source_content)
|
||||
if source_fm is None:
|
||||
continue
|
||||
|
||||
existing_claims = source_fm.get("claims_extracted", [])
|
||||
if isinstance(existing_claims, str):
|
||||
existing_claims = [existing_claims]
|
||||
if not isinstance(existing_claims, list):
|
||||
existing_claims = []
|
||||
|
||||
if claim_filename in existing_claims:
|
||||
continue
|
||||
|
||||
existing_claims.append(claim_filename)
|
||||
new_block = "claims_extracted:\n" + "\n".join(f"- {c}" for c in existing_claims)
|
||||
|
||||
lines = source_content.split("\n")
|
||||
if "claims_extracted:" not in source_content:
|
||||
end_idx = None
|
||||
for i, line in enumerate(lines):
|
||||
if i > 0 and line.strip() == "---":
|
||||
end_idx = i
|
||||
break
|
||||
if end_idx is None:
|
||||
continue
|
||||
lines.insert(end_idx, new_block)
|
||||
else:
|
||||
start_idx = None
|
||||
end_idx = None
|
||||
for i, line in enumerate(lines):
|
||||
if line.startswith("claims_extracted:"):
|
||||
start_idx = i
|
||||
elif start_idx is not None and not line.startswith("- "):
|
||||
end_idx = i
|
||||
break
|
||||
if start_idx is None:
|
||||
continue
|
||||
if end_idx is None:
|
||||
end_idx = len(lines)
|
||||
lines[start_idx:end_idx] = new_block.split("\n")
|
||||
|
||||
modified_sources[str(source_path)] = "\n".join(lines)
|
||||
logger.info("backlink_source_claims: added %s to %s", claim_filename, sourced_from)
|
||||
|
||||
if modified_sources:
|
||||
async with async_main_worktree_lock():
|
||||
for sp, content in modified_sources.items():
|
||||
Path(sp).write_text(content)
|
||||
await git_fn("add", sp, cwd=str(config.MAIN_WORKTREE))
|
||||
rc, out = await git_fn(
|
||||
"commit", "-m", f"backlink: update claims_extracted on {len(modified_sources)} source(s)",
|
||||
cwd=str(config.MAIN_WORKTREE),
|
||||
timeout=15,
|
||||
)
|
||||
if rc == 0:
|
||||
push_rc, push_out = await git_fn(
|
||||
"push", "origin", "main",
|
||||
cwd=str(config.MAIN_WORKTREE),
|
||||
timeout=30,
|
||||
)
|
||||
if push_rc == 0:
|
||||
logger.info("backlink_source_claims: %d source(s) updated and pushed", len(modified_sources))
|
||||
else:
|
||||
logger.warning("backlink_source_claims: push failed: %s", push_out[:200])
|
||||
else:
|
||||
logger.warning("backlink_source_claims: commit failed: %s", out[:200])
|
||||
|
||||
except Exception:
|
||||
logger.exception("backlink_source_claims: failed (non-fatal)")
|
||||
|
||||
|
||||
def archive_source_for_pr(branch: str, domain: str, merged: bool = True):
|
||||
"""Move source from queue/ to archive/{domain}/ after PR merge or close.
|
||||
|
||||
Only handles extract/ branches (Ganymede: skip research sessions).
|
||||
Updates frontmatter: 'processed' for merged, 'rejected' for closed.
|
||||
Accumulates moves for batch commit at end of merge cycle.
|
||||
"""
|
||||
if not branch.startswith("extract/"):
|
||||
return
|
||||
|
||||
source_slug = branch.replace("extract/", "", 1)
|
||||
main_dir = config.MAIN_WORKTREE if hasattr(config, "MAIN_WORKTREE") else "/opt/teleo-eval/workspaces/main"
|
||||
queue_path = os.path.join(main_dir, "inbox", "queue", f"{source_slug}.md")
|
||||
archive_dir = os.path.join(main_dir, "inbox", "archive", domain or "unknown")
|
||||
archive_path = os.path.join(archive_dir, f"{source_slug}.md")
|
||||
|
||||
# Already in archive? Delete queue duplicate
|
||||
if os.path.exists(archive_path):
|
||||
if os.path.exists(queue_path):
|
||||
try:
|
||||
os.remove(queue_path)
|
||||
_pending_source_moves.append((queue_path, "deleted"))
|
||||
logger.info("Source dedup: deleted queue/%s (already in archive/%s)", source_slug, domain)
|
||||
except Exception as e:
|
||||
logger.warning("Source dedup failed: %s", e)
|
||||
return
|
||||
|
||||
# Move from queue to archive
|
||||
if os.path.exists(queue_path):
|
||||
# Update frontmatter before moving (Ganymede: distinguish merged vs rejected)
|
||||
update_source_frontmatter_status(queue_path, "processed" if merged else "rejected")
|
||||
os.makedirs(archive_dir, exist_ok=True)
|
||||
try:
|
||||
shutil.move(queue_path, archive_path)
|
||||
_pending_source_moves.append((queue_path, archive_path))
|
||||
logger.info("Source archived: queue/%s → archive/%s/ (status=%s)",
|
||||
source_slug, domain, "processed" if merged else "rejected")
|
||||
except Exception as e:
|
||||
logger.warning("Source archive failed: %s", e)
|
||||
|
||||
|
||||
async def commit_source_moves(git_fn: Callable):
|
||||
"""Batch commit accumulated source moves. Called at end of merge cycle.
|
||||
|
||||
Rhea review: fetch+reset before touching files, use main_worktree_lock,
|
||||
crash gap is self-healing (reset --hard reverts uncommitted moves).
|
||||
"""
|
||||
if not _pending_source_moves:
|
||||
return
|
||||
|
||||
main_dir = config.MAIN_WORKTREE if hasattr(config, "MAIN_WORKTREE") else "/opt/teleo-eval/workspaces/main"
|
||||
count = len(_pending_source_moves)
|
||||
_pending_source_moves.clear()
|
||||
|
||||
# Acquire file lock — coordinates with telegram bot and other daemon stages (Ganymede: Option C)
|
||||
try:
|
||||
async with async_main_worktree_lock(timeout=10):
|
||||
# Sync worktree with remote (Rhea: fetch+reset, not pull)
|
||||
await git_fn("fetch", "origin", "main", cwd=main_dir, timeout=30)
|
||||
await git_fn("reset", "--hard", "origin/main", cwd=main_dir, timeout=30)
|
||||
|
||||
await git_fn("add", "-A", "inbox/", cwd=main_dir)
|
||||
|
||||
rc, out = await git_fn(
|
||||
"commit", "-m",
|
||||
f"pipeline: archive {count} source(s) post-merge\n\n"
|
||||
f"Pentagon-Agent: Epimetheus <3D35839A-7722-4740-B93D-51157F7D5E70>",
|
||||
cwd=main_dir,
|
||||
)
|
||||
if rc != 0:
|
||||
if "nothing to commit" in out:
|
||||
return
|
||||
logger.warning("Source archive commit failed: %s", out)
|
||||
return
|
||||
|
||||
for attempt in range(3):
|
||||
await git_fn("pull", "--rebase", "origin", "main", cwd=main_dir, timeout=30)
|
||||
rc_push, _ = await git_fn("push", "origin", "main", cwd=main_dir, timeout=30)
|
||||
if rc_push == 0:
|
||||
logger.info("Committed + pushed %d source archive moves", count)
|
||||
return
|
||||
await asyncio.sleep(2)
|
||||
|
||||
logger.warning("Failed to push source archive moves after 3 attempts")
|
||||
await git_fn("reset", "--hard", "origin/main", cwd=main_dir)
|
||||
except TimeoutError:
|
||||
logger.warning("Source archive commit skipped: worktree lock timeout")
|
||||
241
lib/pr_state.py
Normal file
241
lib/pr_state.py
Normal file
|
|
@ -0,0 +1,241 @@
|
|||
"""PR state transitions — single source of truth for all status changes.
|
||||
|
||||
Every UPDATE prs SET status = ... MUST go through this module.
|
||||
|
||||
Invariants enforced:
|
||||
- close: always syncs Forgejo (opt-out for reconciliation only)
|
||||
- approve: requires non-empty domain (ValueError)
|
||||
- merged: always sets merged_at, clears last_error
|
||||
- conflict: always increments merge_failures, sets merge_cycled
|
||||
|
||||
Why this exists: 36 hand-crafted status transitions across evaluate.py
|
||||
and merge.py produced 3 incidents (domain NULL, Forgejo ghost PRs,
|
||||
merge_cycled missing). Centralizing eliminates the entire class of
|
||||
"forgot to update X in this one code path" bugs.
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
from .forgejo import api as forgejo_api, repo_path
|
||||
|
||||
logger = logging.getLogger("pipeline.pr_state")
|
||||
|
||||
|
||||
async def close_pr(
|
||||
conn,
|
||||
pr_number: int,
|
||||
*,
|
||||
last_error: str = None,
|
||||
merge_cycled: bool = False,
|
||||
inc_merge_failures: bool = False,
|
||||
close_on_forgejo: bool = True,
|
||||
) -> bool:
|
||||
"""Close a PR in DB and on Forgejo. Returns True on success, False on Forgejo failure.
|
||||
|
||||
Args:
|
||||
close_on_forgejo: False only when caller already closed on Forgejo
|
||||
(reconciliation, ghost PR cleanup after manual close).
|
||||
|
||||
If Forgejo API fails, the DB update is SKIPPED to prevent ghost PRs
|
||||
(DB says closed, Forgejo says open). The reconciliation loop in
|
||||
merge.py._reconcile_db_state catches any that slip through.
|
||||
"""
|
||||
if close_on_forgejo:
|
||||
result = await forgejo_api("PATCH", repo_path(f"pulls/{pr_number}"), {"state": "closed"})
|
||||
if result is None:
|
||||
logger.error("close_pr: Forgejo API failed for PR #%d, skipping DB update", pr_number)
|
||||
return False
|
||||
|
||||
parts = ["status = 'closed'"]
|
||||
params = []
|
||||
|
||||
if last_error is not None:
|
||||
parts.append("last_error = ?")
|
||||
params.append(last_error)
|
||||
|
||||
if merge_cycled:
|
||||
parts.append("merge_cycled = 1")
|
||||
|
||||
if inc_merge_failures:
|
||||
parts.append("merge_failures = COALESCE(merge_failures, 0) + 1")
|
||||
|
||||
params.append(pr_number)
|
||||
conn.execute(f"UPDATE prs SET {', '.join(parts)} WHERE number = ?", params)
|
||||
return True
|
||||
|
||||
|
||||
def approve_pr(
|
||||
conn,
|
||||
pr_number: int,
|
||||
*,
|
||||
domain: str,
|
||||
auto_merge: int = 0,
|
||||
leo_verdict: str = None,
|
||||
domain_verdict: str = None,
|
||||
):
|
||||
"""Approve a PR. Raises ValueError if domain is empty/None."""
|
||||
if not domain:
|
||||
raise ValueError(f"Cannot approve PR #{pr_number} without domain")
|
||||
|
||||
parts = ["status = 'approved'", "domain = COALESCE(domain, ?)"]
|
||||
params = [domain]
|
||||
|
||||
parts.append("auto_merge = ?")
|
||||
params.append(auto_merge)
|
||||
|
||||
if leo_verdict is not None:
|
||||
parts.append("leo_verdict = ?")
|
||||
params.append(leo_verdict)
|
||||
|
||||
if domain_verdict is not None:
|
||||
parts.append("domain_verdict = ?")
|
||||
params.append(domain_verdict)
|
||||
|
||||
params.append(pr_number)
|
||||
conn.execute(f"UPDATE prs SET {', '.join(parts)} WHERE number = ?", params)
|
||||
|
||||
|
||||
def mark_merged(conn, pr_number: int):
|
||||
"""Mark PR as merged. Always sets merged_at, clears last_error."""
|
||||
conn.execute(
|
||||
"UPDATE prs SET status = 'merged', merged_at = datetime('now'), "
|
||||
"last_error = NULL WHERE number = ?",
|
||||
(pr_number,),
|
||||
)
|
||||
|
||||
|
||||
def mark_conflict(conn, pr_number: int, *, last_error: str = None):
|
||||
"""Mark PR as conflict. Always increments merge_failures, sets merge_cycled."""
|
||||
conn.execute(
|
||||
"UPDATE prs SET status = 'conflict', merge_cycled = 1, "
|
||||
"merge_failures = COALESCE(merge_failures, 0) + 1, "
|
||||
"last_error = ? WHERE number = ?",
|
||||
(last_error, pr_number),
|
||||
)
|
||||
|
||||
|
||||
def mark_conflict_permanent(
|
||||
conn,
|
||||
pr_number: int,
|
||||
*,
|
||||
last_error: str = None,
|
||||
conflict_rebase_attempts: int = None,
|
||||
):
|
||||
"""Mark PR as permanently conflicted (no more retries)."""
|
||||
parts = ["status = 'conflict_permanent'"]
|
||||
params = []
|
||||
|
||||
if last_error is not None:
|
||||
parts.append("last_error = ?")
|
||||
params.append(last_error)
|
||||
|
||||
if conflict_rebase_attempts is not None:
|
||||
parts.append("conflict_rebase_attempts = ?")
|
||||
params.append(conflict_rebase_attempts)
|
||||
|
||||
params.append(pr_number)
|
||||
conn.execute(f"UPDATE prs SET {', '.join(parts)} WHERE number = ?", params)
|
||||
|
||||
|
||||
def reopen_pr(
|
||||
conn,
|
||||
pr_number: int,
|
||||
*,
|
||||
leo_verdict: str = None,
|
||||
domain_verdict: str = None,
|
||||
last_error: str = None,
|
||||
eval_issues: str = None,
|
||||
dec_eval_attempts: bool = False,
|
||||
reset_for_reeval: bool = False,
|
||||
conflict_rebase_attempts: int = None,
|
||||
):
|
||||
"""Set PR back to open.
|
||||
|
||||
Covers all reopen scenarios:
|
||||
- Transient failure (API error): no extra args
|
||||
- Rejection: leo_verdict + last_error + eval_issues
|
||||
- Batch overflow: dec_eval_attempts=True
|
||||
- Conflict resolved: reset_for_reeval=True
|
||||
"""
|
||||
parts = ["status = 'open'"]
|
||||
params = []
|
||||
|
||||
if reset_for_reeval:
|
||||
parts.extend([
|
||||
"leo_verdict = 'pending'",
|
||||
"domain_verdict = 'pending'",
|
||||
"eval_attempts = 0",
|
||||
])
|
||||
else:
|
||||
if leo_verdict is not None:
|
||||
parts.append("leo_verdict = ?")
|
||||
params.append(leo_verdict)
|
||||
if domain_verdict is not None:
|
||||
parts.append("domain_verdict = ?")
|
||||
params.append(domain_verdict)
|
||||
|
||||
if last_error is not None:
|
||||
parts.append("last_error = ?")
|
||||
params.append(last_error)
|
||||
|
||||
if eval_issues is not None:
|
||||
parts.append("eval_issues = ?")
|
||||
params.append(eval_issues)
|
||||
|
||||
if dec_eval_attempts:
|
||||
parts.append("eval_attempts = COALESCE(eval_attempts, 1) - 1")
|
||||
|
||||
if conflict_rebase_attempts is not None:
|
||||
parts.append("conflict_rebase_attempts = ?")
|
||||
params.append(conflict_rebase_attempts)
|
||||
|
||||
params.append(pr_number)
|
||||
conn.execute(f"UPDATE prs SET {', '.join(parts)} WHERE number = ?", params)
|
||||
|
||||
|
||||
def start_fixing(conn, pr_number: int) -> bool:
|
||||
"""Atomically claim PR for fixing (status open -> fixing).
|
||||
|
||||
Also increments fix_attempts and sets last_attempt in one statement.
|
||||
Returns True if claimed, False if already claimed.
|
||||
"""
|
||||
cursor = conn.execute(
|
||||
"UPDATE prs SET status = 'fixing', "
|
||||
"fix_attempts = COALESCE(fix_attempts, 0) + 1, "
|
||||
"last_attempt = datetime('now') "
|
||||
"WHERE number = ? AND status = 'open'",
|
||||
(pr_number,),
|
||||
)
|
||||
return cursor.rowcount > 0
|
||||
|
||||
|
||||
def reset_for_reeval(conn, pr_number: int):
|
||||
"""Reset a PR for re-evaluation after a fix.
|
||||
|
||||
Clears all eval state so the PR goes through the full eval cycle again.
|
||||
Used by both mechanical fixer and substantive fixer after successful fixes.
|
||||
"""
|
||||
conn.execute(
|
||||
"""UPDATE prs SET
|
||||
status = 'open',
|
||||
eval_attempts = 0,
|
||||
eval_issues = '[]',
|
||||
tier0_pass = NULL,
|
||||
domain_verdict = 'pending',
|
||||
leo_verdict = 'pending',
|
||||
last_error = NULL
|
||||
WHERE number = ?""",
|
||||
(pr_number,),
|
||||
)
|
||||
|
||||
|
||||
def start_review(conn, pr_number: int) -> bool:
|
||||
"""Atomically claim PR for review (status open -> reviewing).
|
||||
|
||||
Returns True if claimed, False if already claimed by another worker.
|
||||
"""
|
||||
cursor = conn.execute(
|
||||
"UPDATE prs SET status = 'reviewing' WHERE number = ? AND status = 'open'",
|
||||
(pr_number,),
|
||||
)
|
||||
return cursor.rowcount > 0
|
||||
221
lib/pre_screen.py
Normal file
221
lib/pre_screen.py
Normal file
|
|
@ -0,0 +1,221 @@
|
|||
"""Pre-screening: identify themes from source, fetch prior art from Qdrant.
|
||||
|
||||
Runs before extraction to show the extractor what the KB already knows.
|
||||
Reduces near-duplicates (our #1 rejection cause) by turning semantic
|
||||
pre-screening from a manual discipline into a pipeline feature.
|
||||
|
||||
Design: Leo (approved 2026-03-30). Owner: Epimetheus.
|
||||
|
||||
Flow:
|
||||
1. Haiku identifies 3-5 themes from source text
|
||||
2. Each theme + title (with author-stripped variant) → Tier 1 search
|
||||
3. Results injected into extraction prompt as "Prior Art"
|
||||
4. Extractor classifies extractions as NEW / ENRICHMENT / CHALLENGE
|
||||
5. ENRICHMENT/CHALLENGE must cite specific target claim (hard gate)
|
||||
|
||||
Cost: ~$0.002/source (Haiku theme pass) + free Qdrant queries.
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
|
||||
import requests
|
||||
|
||||
# Search library (same Tier 1 path used by Argus + Telegram bot)
|
||||
from pathlib import Path
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
from lib.search import search
|
||||
|
||||
OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
|
||||
THEME_MODEL = "anthropic/claude-haiku-4.5"
|
||||
|
||||
# Regex to strip leading author/entity patterns from titles
|
||||
# e.g. "Shapiro: How Far Will AI Video Go" → "How Far Will AI Video Go"
|
||||
# "Aschenbrenner — Situational Awareness" → "Situational Awareness"
|
||||
# Prior art threshold — only show results above this score to the extractor.
|
||||
# 0.50 catches mechanism-level matches where compound themes dilute embeddings.
|
||||
# Was 0.65 but Haiku compound themes score 0.50-0.60 even on exact matches.
|
||||
# False positives cost nothing (extractor sees irrelevant prior art, ignores it).
|
||||
# False negatives cost wasted extraction + review + rejection.
|
||||
PRIOR_ART_THRESHOLD = 0.50
|
||||
|
||||
AUTHOR_PREFIX_RE = re.compile(
|
||||
r"^[A-Za-z\-']+(?:\s+[A-Za-z\-']+)?\s*[:–—\-]\s*", re.UNICODE
|
||||
)
|
||||
|
||||
|
||||
def identify_themes(source_content: str, api_key: str, source_title: str = "") -> list[str]:
|
||||
"""Use Haiku to identify 3-5 major themes from source text.
|
||||
|
||||
Returns a list of theme strings suitable as search queries.
|
||||
Falls back to [source_title] on API failure.
|
||||
"""
|
||||
# Truncate source to keep Haiku costs minimal
|
||||
snippet = source_content[:3000]
|
||||
|
||||
prompt = f"""Identify the 3-5 major themes or topics in this text.
|
||||
Return ONLY a JSON array of short search queries (3-8 words each).
|
||||
Keep queries SHORT — 3-5 words is ideal. Compound phrases score poorly in vector search.
|
||||
|
||||
Example good output: ["futarchy governance", "semaglutide kidney outcomes", "ICO oversubscription"]
|
||||
Example bad output: ["futarchy governance mechanisms detecting revenue misrepresentation token launches", "prediction market accuracy identifying fraudulent financial claims"]
|
||||
|
||||
Text:
|
||||
{snippet}
|
||||
|
||||
Return JSON array only, no explanation."""
|
||||
|
||||
try:
|
||||
headers = {
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
"Content-Type": "application/json",
|
||||
"HTTP-Referer": "https://livingip.xyz",
|
||||
"X-Title": "Teleo Pre-Screen",
|
||||
}
|
||||
payload = {
|
||||
"model": THEME_MODEL,
|
||||
"messages": [{"role": "user", "content": prompt}],
|
||||
"temperature": 0.1,
|
||||
"max_tokens": 500,
|
||||
}
|
||||
resp = requests.post(OPENROUTER_URL, headers=headers, json=payload, timeout=30)
|
||||
resp.raise_for_status()
|
||||
content = resp.json()["choices"][0]["message"]["content"].strip()
|
||||
|
||||
# Strip markdown fencing if present
|
||||
if content.startswith("```"):
|
||||
content = re.sub(r"^```(?:json)?\s*\n?", "", content)
|
||||
content = re.sub(r"\n?```\s*$", "", content)
|
||||
|
||||
themes = json.loads(content)
|
||||
if isinstance(themes, list) and all(isinstance(t, str) for t in themes):
|
||||
return themes[:5]
|
||||
except Exception as e:
|
||||
print(f" WARN: Theme identification failed: {e}", file=sys.stderr)
|
||||
|
||||
# Fallback: use title as the only theme
|
||||
return [source_title] if source_title else []
|
||||
|
||||
|
||||
def _strip_author(title: str) -> str:
|
||||
"""Strip leading author/entity prefix from a title.
|
||||
|
||||
"Shapiro: How Far Will AI Video Go" → "How Far Will AI Video Go"
|
||||
"Noah Smith — AI and Jobs" → "AI and Jobs"
|
||||
"""
|
||||
stripped = AUTHOR_PREFIX_RE.sub("", title).strip()
|
||||
# Only use stripped version if it's meaningfully different
|
||||
if stripped and len(stripped) > 10 and stripped != title:
|
||||
return stripped
|
||||
return ""
|
||||
|
||||
|
||||
def _extract_title_from_source(source_content: str, source_file: str) -> str:
|
||||
"""Get a usable title from source frontmatter or filename."""
|
||||
# Try frontmatter title
|
||||
match = re.search(r"^title:\s*[\"']?(.+?)[\"']?\s*$", source_content, re.MULTILINE)
|
||||
if match:
|
||||
return match.group(1).strip()
|
||||
|
||||
# Fall back to filename
|
||||
basename = os.path.basename(source_file).replace(".md", "")
|
||||
# Strip date prefix (e.g., "2026-03-15-article-name" → "article-name")
|
||||
basename = re.sub(r"^\d{4}-\d{2}-\d{2}-", "", basename)
|
||||
return basename.replace("-", " ")
|
||||
|
||||
|
||||
def pre_screen(source_content: str, source_file: str, api_key: str,
|
||||
domain: str | None = None) -> dict:
|
||||
"""Run full pre-screening: themes → search → prior art.
|
||||
|
||||
Returns:
|
||||
{
|
||||
"themes": ["theme1", "theme2", ...],
|
||||
"prior_art": [
|
||||
{"claim_path": str, "title": str, "score": float, "query": str},
|
||||
...
|
||||
],
|
||||
"search_queries": ["query1", "query2", ...], # for audit trail
|
||||
}
|
||||
"""
|
||||
title = _extract_title_from_source(source_content, source_file)
|
||||
|
||||
# Step 1: Identify themes
|
||||
themes = identify_themes(source_content, api_key, source_title=title)
|
||||
|
||||
# Step 2: Build search queries (themes + title + author-stripped title)
|
||||
queries = list(themes)
|
||||
if title and title not in queries:
|
||||
queries.append(title)
|
||||
stripped = _strip_author(title)
|
||||
if stripped and stripped not in queries:
|
||||
queries.append(stripped)
|
||||
|
||||
# Step 3: Search Qdrant for each query (Tier 1: expand=False)
|
||||
seen_paths: set[str] = set()
|
||||
prior_art: list[dict] = []
|
||||
|
||||
for query in queries:
|
||||
try:
|
||||
results = search(query, expand=False, domain=None) # cross-domain on purpose
|
||||
for hit in results.get("direct_results", []):
|
||||
path = hit.get("claim_path", "")
|
||||
if path and path not in seen_paths:
|
||||
seen_paths.add(path)
|
||||
prior_art.append({
|
||||
"claim_path": path,
|
||||
"title": hit.get("title", os.path.basename(path).replace(".md", "").replace("-", " ")),
|
||||
"score": round(hit.get("score", 0), 3),
|
||||
"query": query,
|
||||
})
|
||||
except Exception as e:
|
||||
print(f" WARN: Pre-screen search failed for '{query[:50]}': {e}", file=sys.stderr)
|
||||
|
||||
# Filter below threshold, sort by score descending, cap at 25
|
||||
prior_art = [p for p in prior_art if p["score"] >= PRIOR_ART_THRESHOLD]
|
||||
prior_art.sort(key=lambda x: x["score"], reverse=True)
|
||||
prior_art = prior_art[:25]
|
||||
|
||||
return {
|
||||
"themes": themes,
|
||||
"prior_art": prior_art,
|
||||
"search_queries": queries,
|
||||
}
|
||||
|
||||
|
||||
def format_prior_art_for_prompt(prior_art: list[dict]) -> str:
|
||||
"""Format prior art results for injection into the extraction prompt.
|
||||
|
||||
Leo's required format:
|
||||
- [claim-slug](path) — similarity: 0.82 — query: "theme that matched"
|
||||
"""
|
||||
if not prior_art:
|
||||
return "No similar claims found in the KB. This source likely covers novel territory."
|
||||
|
||||
lines = []
|
||||
for item in prior_art:
|
||||
slug = os.path.basename(item["claim_path"]).replace(".md", "")
|
||||
lines.append(
|
||||
f"- [{slug}]({item['claim_path']}) — similarity: {item['score']:.2f} — query: \"{item['query'][:60]}\""
|
||||
)
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def format_prior_art_for_pr(prior_art: list[dict]) -> str:
|
||||
"""Format prior art for PR body (structured, reviewable by Leo).
|
||||
|
||||
Shows similarity score + which query matched for verification.
|
||||
"""
|
||||
if not prior_art:
|
||||
return "No prior art found — source covers novel territory.\n"
|
||||
|
||||
lines = ["## Prior Art (automated pre-screening)\n"]
|
||||
for item in prior_art:
|
||||
slug = os.path.basename(item["claim_path"]).replace(".md", "")
|
||||
lines.append(
|
||||
f"- [{slug}]({item['claim_path']}) — similarity: {item['score']:.2f} — matched query: \"{item['query'][:80]}\""
|
||||
)
|
||||
lines.append("")
|
||||
return "\n".join(lines)
|
||||
480
lib/search.py
Normal file
480
lib/search.py
Normal file
|
|
@ -0,0 +1,480 @@
|
|||
"""Shared Qdrant vector search library for the Teleo knowledge base.
|
||||
|
||||
Provides embed + search + graph expansion as a reusable library.
|
||||
Any consumer (Argus dashboard, Telegram bot, agent research) imports from here.
|
||||
|
||||
Layer 1: Qdrant vector search (semantic similarity)
|
||||
Layer 2: Graph expansion (1-hop via frontmatter edges)
|
||||
Layer 3: Left to the caller (agent context, domain filtering)
|
||||
|
||||
Owner: Epimetheus
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
import urllib.request
|
||||
|
||||
from . import config
|
||||
|
||||
logger = logging.getLogger("pipeline.search")
|
||||
|
||||
# --- Config (all from environment or config.py defaults) ---
|
||||
QDRANT_URL = os.environ.get("QDRANT_URL", "http://localhost:6333")
|
||||
QDRANT_COLLECTION = os.environ.get("QDRANT_COLLECTION", "teleo-claims")
|
||||
EMBEDDING_MODEL = "text-embedding-3-small"
|
||||
|
||||
_OPENROUTER_KEY: str | None = None
|
||||
|
||||
WIKI_LINK_RE = re.compile(r"\[\[([^\]]+)\]\]")
|
||||
|
||||
# Structural files that should never be included in graph expansion results.
|
||||
# These are indexes/MOCs, not claims — expanding them pulls entire domains.
|
||||
STRUCTURAL_FILES = {"_map.md", "_overview.md"}
|
||||
|
||||
|
||||
def _get_api_key() -> str | None:
|
||||
"""Load OpenRouter API key (cached after first read)."""
|
||||
global _OPENROUTER_KEY
|
||||
if _OPENROUTER_KEY:
|
||||
return _OPENROUTER_KEY
|
||||
key_file = config.SECRETS_DIR / "openrouter-key"
|
||||
if key_file.exists():
|
||||
_OPENROUTER_KEY = key_file.read_text().strip()
|
||||
return _OPENROUTER_KEY
|
||||
_OPENROUTER_KEY = os.environ.get("OPENROUTER_API_KEY")
|
||||
return _OPENROUTER_KEY
|
||||
|
||||
|
||||
# --- Layer 1: Vector search ---
|
||||
|
||||
|
||||
def embed_query(text: str) -> list[float] | None:
|
||||
"""Embed a query string via OpenRouter (OpenAI-compatible endpoint).
|
||||
|
||||
Returns 1536-dim vector or None on failure.
|
||||
"""
|
||||
api_key = _get_api_key()
|
||||
if not api_key:
|
||||
logger.error("No OpenRouter API key available for embedding")
|
||||
return None
|
||||
|
||||
payload = json.dumps({
|
||||
"model": f"openai/{EMBEDDING_MODEL}",
|
||||
"input": text[:8000],
|
||||
}).encode()
|
||||
req = urllib.request.Request(
|
||||
"https://openrouter.ai/api/v1/embeddings",
|
||||
data=payload,
|
||||
headers={
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
)
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=15) as resp:
|
||||
data = json.loads(resp.read())
|
||||
return data["data"][0]["embedding"]
|
||||
except Exception as e:
|
||||
logger.error("Embedding failed: %s", e)
|
||||
return None
|
||||
|
||||
|
||||
def search_qdrant(vector: list[float], limit: int = 10,
|
||||
domain: str | None = None, confidence: str | None = None,
|
||||
exclude: list[str] | None = None,
|
||||
score_threshold: float = 0.3,
|
||||
offset: int = 0) -> list[dict]:
|
||||
"""Search Qdrant collection for nearest claims.
|
||||
|
||||
Args:
|
||||
offset: Skip first N results (Qdrant native offset for pagination).
|
||||
|
||||
Returns list of hits: [{id, score, payload: {claim_path, claim_title, ...}}]
|
||||
"""
|
||||
must_filters = []
|
||||
if domain:
|
||||
must_filters.append({"key": "domain", "match": {"value": domain}})
|
||||
if confidence:
|
||||
must_filters.append({"key": "confidence", "match": {"value": confidence}})
|
||||
|
||||
must_not_filters = []
|
||||
if exclude:
|
||||
for path in exclude:
|
||||
must_not_filters.append({"key": "claim_path", "match": {"value": path}})
|
||||
|
||||
body = {
|
||||
"vector": vector,
|
||||
"limit": limit,
|
||||
"with_payload": True,
|
||||
"score_threshold": score_threshold,
|
||||
}
|
||||
if offset > 0:
|
||||
body["offset"] = offset
|
||||
if must_filters or must_not_filters:
|
||||
body["filter"] = {}
|
||||
if must_filters:
|
||||
body["filter"]["must"] = must_filters
|
||||
if must_not_filters:
|
||||
body["filter"]["must_not"] = must_not_filters
|
||||
|
||||
req = urllib.request.Request(
|
||||
f"{QDRANT_URL}/collections/{QDRANT_COLLECTION}/points/search",
|
||||
data=json.dumps(body).encode(),
|
||||
headers={"Content-Type": "application/json"},
|
||||
)
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=10) as resp:
|
||||
data = json.loads(resp.read())
|
||||
return data.get("result", [])
|
||||
except Exception as e:
|
||||
logger.error("Qdrant search failed: %s", e)
|
||||
return []
|
||||
|
||||
|
||||
# --- Layer 2: Graph expansion ---
|
||||
|
||||
|
||||
def _parse_frontmatter_edges(path: Path) -> dict:
|
||||
"""Extract relationship edges from a claim's frontmatter.
|
||||
|
||||
Handles both YAML formats:
|
||||
depends_on: ["item1", "item2"] (inline list)
|
||||
depends_on: (multi-line list)
|
||||
- item1
|
||||
- item2
|
||||
|
||||
Returns {supports: [...], challenges: [...], depends_on: [...], related: [...], wiki_links: [...]}.
|
||||
wiki_links are separated from explicit related edges for differential weighting.
|
||||
"""
|
||||
edges = {"supports": [], "challenges": [], "depends_on": [], "related": [], "wiki_links": []}
|
||||
try:
|
||||
text = path.read_text(errors="replace")
|
||||
except Exception:
|
||||
return edges
|
||||
|
||||
if not text.startswith("---"):
|
||||
return edges
|
||||
end = text.find("\n---", 3)
|
||||
if end == -1:
|
||||
return edges
|
||||
|
||||
fm_text = text[3:end]
|
||||
|
||||
# Use YAML parser for reliable edge extraction
|
||||
try:
|
||||
import yaml
|
||||
fm = yaml.safe_load(fm_text)
|
||||
if isinstance(fm, dict):
|
||||
for field in ("supports", "challenges", "depends_on", "related"):
|
||||
val = fm.get(field)
|
||||
if isinstance(val, list):
|
||||
edges[field] = [str(v).strip() for v in val if v]
|
||||
elif isinstance(val, str) and val.strip():
|
||||
edges[field] = [val.strip()]
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Extract wiki links from body as separate edge type (lower weight)
|
||||
body = text[end + 4:]
|
||||
all_explicit = set()
|
||||
for field in ("supports", "challenges", "depends_on", "related"):
|
||||
all_explicit.update(edges[field])
|
||||
|
||||
wiki_links = WIKI_LINK_RE.findall(body)
|
||||
for link in wiki_links:
|
||||
link = link.strip()
|
||||
if link and link not in all_explicit and link not in edges["wiki_links"]:
|
||||
edges["wiki_links"].append(link)
|
||||
|
||||
return edges
|
||||
|
||||
|
||||
def _resolve_claim_path(name: str, repo_root: Path) -> Path | None:
|
||||
"""Resolve a claim name (from frontmatter edge or wiki link) to a file path.
|
||||
|
||||
Handles both naming conventions:
|
||||
- "GLP-1 receptor agonists are..." → "GLP-1 receptor agonists are....md" (spaces)
|
||||
- "glp-1-persistence-drops..." → "glp-1-persistence-drops....md" (slugified)
|
||||
|
||||
Checks domains/, core/, foundations/, decisions/ subdirectories.
|
||||
"""
|
||||
# Try exact name first (spaces in filename), then slugified
|
||||
candidates = [name]
|
||||
slug = name.lower().replace(" ", "-").replace("_", "-")
|
||||
if slug != name:
|
||||
candidates.append(slug)
|
||||
|
||||
for subdir in ["domains", "core", "foundations", "decisions"]:
|
||||
base = repo_root / subdir
|
||||
if not base.is_dir():
|
||||
continue
|
||||
for candidate_name in candidates:
|
||||
for md in base.rglob(f"{candidate_name}.md"):
|
||||
return md
|
||||
return None
|
||||
|
||||
|
||||
def graph_expand(seed_paths: list[str], repo_root: Path | None = None,
|
||||
max_expanded: int = 30,
|
||||
challenge_weight: float = 1.5,
|
||||
seen: set[str] | None = None) -> list[dict]:
|
||||
"""Layer 2: Expand seed claims 1-hop through knowledge graph edges.
|
||||
|
||||
Traverses supports/challenges/depends_on/related/wiki_links edges in frontmatter.
|
||||
Edge weights: challenges 1.5x, depends_on 1.25x, supports/related 1.0x, wiki_links 0.5x.
|
||||
Results sorted by weight descending so cap cuts low-value edges first.
|
||||
|
||||
Args:
|
||||
seen: Optional set of paths already matched (e.g. from keyword search) to exclude.
|
||||
|
||||
Returns list of {claim_path, claim_title, edge_type, edge_weight, from_claim}.
|
||||
Excludes claims already in seed_paths or seen set.
|
||||
"""
|
||||
EDGE_WEIGHTS = {
|
||||
"challenges": 1.5,
|
||||
"challenged_by": 1.5,
|
||||
"depends_on": 1.25,
|
||||
"supports": 1.0,
|
||||
"related": 1.0,
|
||||
"wiki_links": 0.5,
|
||||
}
|
||||
|
||||
root = repo_root or config.MAIN_WORKTREE
|
||||
all_expanded = []
|
||||
visited = set(seed_paths)
|
||||
if seen:
|
||||
visited.update(seen)
|
||||
|
||||
for seed_path in seed_paths:
|
||||
full_path = root / seed_path
|
||||
if not full_path.exists():
|
||||
continue
|
||||
|
||||
edges = _parse_frontmatter_edges(full_path)
|
||||
|
||||
for edge_type, targets in edges.items():
|
||||
weight = EDGE_WEIGHTS.get(edge_type, 1.0)
|
||||
|
||||
for target_name in targets:
|
||||
target_path = _resolve_claim_path(target_name, root)
|
||||
if target_path is None:
|
||||
continue
|
||||
|
||||
rel_path = str(target_path.relative_to(root))
|
||||
if rel_path in visited:
|
||||
continue
|
||||
# Skip structural files (MOCs/indexes) — they pull entire domains
|
||||
if target_path.name in STRUCTURAL_FILES:
|
||||
continue
|
||||
visited.add(rel_path)
|
||||
|
||||
# Read title from frontmatter
|
||||
title = target_name
|
||||
try:
|
||||
text = target_path.read_text(errors="replace")
|
||||
if text.startswith("---"):
|
||||
end = text.find("\n---", 3)
|
||||
if end > 0:
|
||||
import yaml
|
||||
fm = yaml.safe_load(text[3:end])
|
||||
if isinstance(fm, dict):
|
||||
title = fm.get("name", fm.get("title", target_name))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
all_expanded.append({
|
||||
"claim_path": rel_path,
|
||||
"claim_title": str(title),
|
||||
"edge_type": edge_type,
|
||||
"edge_weight": weight,
|
||||
"from_claim": seed_path,
|
||||
})
|
||||
|
||||
# Sort by weight descending so cap cuts lowest-value edges first
|
||||
all_expanded.sort(key=lambda x: x["edge_weight"], reverse=True)
|
||||
return all_expanded[:max_expanded]
|
||||
|
||||
|
||||
# --- Combined search (Layer 1 + Layer 2) ---
|
||||
|
||||
# Default thresholds — lowered Apr 5 after production audit showed 0 vector hits.
|
||||
# text-embedding-3-small scores 0.50-0.60 on conceptual matches (e.g. "risks in
|
||||
# investing" vs specific claims). 0.70 rejected every result. 0.50/0.40 lets
|
||||
# relevant claims through while still filtering noise.
|
||||
PASS1_LIMIT = 5
|
||||
PASS1_THRESHOLD = 0.50
|
||||
PASS2_LIMIT = 5
|
||||
PASS2_THRESHOLD = 0.40
|
||||
HARD_CAP = 10
|
||||
|
||||
|
||||
def _dedup_hits(hits: list[dict], seen: set[str]) -> list[dict]:
|
||||
"""Filter Qdrant hits: dedup by claim_path, exclude structural files."""
|
||||
results = []
|
||||
for hit in hits:
|
||||
payload = hit.get("payload", {})
|
||||
claim_path = payload.get("claim_path", "")
|
||||
if claim_path in seen:
|
||||
continue
|
||||
if claim_path.split("/")[-1] in STRUCTURAL_FILES:
|
||||
continue
|
||||
seen.add(claim_path)
|
||||
results.append({
|
||||
"claim_title": payload.get("claim_title", ""),
|
||||
"claim_path": claim_path,
|
||||
"score": round(hit.get("score", 0), 4),
|
||||
"domain": payload.get("domain", ""),
|
||||
"confidence": payload.get("confidence", ""),
|
||||
"snippet": payload.get("snippet", "")[:200],
|
||||
"type": payload.get("type", "claim"),
|
||||
})
|
||||
return results
|
||||
|
||||
|
||||
def _sort_results(direct: list[dict], expanded: list[dict]) -> list[dict]:
|
||||
"""Sort combined results: similarity desc → challenged_by → other expansion.
|
||||
|
||||
Sort order is load-bearing: LLMs have primacy bias, so best claims first.
|
||||
"""
|
||||
# Direct results already sorted by Qdrant (cosine desc)
|
||||
sorted_direct = sorted(direct, key=lambda x: x.get("score", 0), reverse=True)
|
||||
|
||||
# Expansion: challenged_by first (counterpoints), then rest by weight
|
||||
challenged = [e for e in expanded if e.get("edge_type") == "challenges"]
|
||||
other_expanded = [e for e in expanded if e.get("edge_type") != "challenges"]
|
||||
challenged.sort(key=lambda x: x.get("edge_weight", 0), reverse=True)
|
||||
other_expanded.sort(key=lambda x: x.get("edge_weight", 0), reverse=True)
|
||||
|
||||
return sorted_direct + challenged + other_expanded
|
||||
|
||||
|
||||
def search(query: str, expand: bool = False,
|
||||
domain: str | None = None, confidence: str | None = None,
|
||||
exclude: list[str] | None = None) -> dict:
|
||||
"""Two-pass semantic search: embed query, search Qdrant, optionally expand.
|
||||
|
||||
Pass 1 (expand=False, default): Top 5 claims from Qdrant, score >= 0.70.
|
||||
Sufficient for ~80% of queries. Fast and focused.
|
||||
|
||||
Pass 2 (expand=True): Next 5 claims (offset=5, score >= 0.60) plus
|
||||
graph-expanded claims (challenged_by, related edges). Hard cap 10 total.
|
||||
Agent calls this only when pass 1 didn't answer the question.
|
||||
|
||||
Returns {
|
||||
"query": str,
|
||||
"direct_results": [...], # Layer 1 Qdrant hits (sorted by score desc)
|
||||
"expanded_results": [...], # Layer 2 graph expansion (challenges first)
|
||||
"total": int,
|
||||
}
|
||||
"""
|
||||
vector = embed_query(query)
|
||||
if vector is None:
|
||||
return {"query": query, "direct_results": [], "expanded_results": [],
|
||||
"total": 0, "error": "embedding_failed"}
|
||||
|
||||
# --- Pass 1: Top 5, high threshold ---
|
||||
hits = search_qdrant(vector, limit=PASS1_LIMIT, domain=domain,
|
||||
confidence=confidence, exclude=exclude,
|
||||
score_threshold=PASS1_THRESHOLD)
|
||||
|
||||
seen_paths: set[str] = set()
|
||||
if exclude:
|
||||
seen_paths.update(exclude)
|
||||
direct = _dedup_hits(hits, seen_paths)
|
||||
|
||||
expanded = []
|
||||
if expand:
|
||||
# --- Pass 2: Next 5 from Qdrant (lower threshold, offset) ---
|
||||
pass2_hits = search_qdrant(vector, limit=PASS2_LIMIT, domain=domain,
|
||||
confidence=confidence, exclude=exclude,
|
||||
score_threshold=PASS2_THRESHOLD,
|
||||
offset=PASS1_LIMIT)
|
||||
pass2_direct = _dedup_hits(pass2_hits, seen_paths)
|
||||
direct.extend(pass2_direct)
|
||||
|
||||
# Graph expansion on all direct results (pass 1 + pass 2 seeds)
|
||||
seed_paths = [r["claim_path"] for r in direct]
|
||||
remaining_cap = HARD_CAP - len(direct)
|
||||
if remaining_cap > 0:
|
||||
expanded = graph_expand(seed_paths, max_expanded=remaining_cap,
|
||||
seen=seen_paths)
|
||||
|
||||
# Enforce hard cap across all results
|
||||
all_sorted = _sort_results(direct, expanded)[:HARD_CAP]
|
||||
|
||||
# Split back into direct vs expanded for backward compat
|
||||
direct_paths = {r["claim_path"] for r in direct}
|
||||
final_direct = [r for r in all_sorted if r.get("claim_path") in direct_paths]
|
||||
final_expanded = [r for r in all_sorted if r.get("claim_path") not in direct_paths]
|
||||
|
||||
return {
|
||||
"query": query,
|
||||
"direct_results": final_direct,
|
||||
"expanded_results": final_expanded,
|
||||
"total": len(all_sorted),
|
||||
}
|
||||
|
||||
|
||||
# --- Duplicate detection ---
|
||||
|
||||
|
||||
def check_duplicate(text: str, threshold: float = 0.85,
|
||||
domain: str | None = None) -> dict:
|
||||
"""Check if a claim/text is a near-duplicate of existing KB content.
|
||||
|
||||
Embeds the text, searches Qdrant, returns top-3 matches with scores.
|
||||
Thresholds: >=0.85 likely duplicate, 0.70-0.85 check manually, <0.70 novel.
|
||||
|
||||
Args:
|
||||
text: The claim text to check.
|
||||
threshold: Minimum score to flag as potential duplicate (default 0.85).
|
||||
domain: Optional domain filter.
|
||||
|
||||
Returns:
|
||||
{
|
||||
"query": str,
|
||||
"is_duplicate": bool, # True if any match >= threshold
|
||||
"highest_score": float, # Best match score
|
||||
"verdict": str, # "duplicate" | "check_manually" | "novel"
|
||||
"matches": [ # Top 3 matches
|
||||
{"score": float, "claim_path": str, "claim_title": str, "domain": str}
|
||||
]
|
||||
}
|
||||
"""
|
||||
vector = embed_query(text)
|
||||
if vector is None:
|
||||
return {"query": text[:100], "is_duplicate": False, "highest_score": 0,
|
||||
"verdict": "error", "matches": [], "error": "embedding_failed"}
|
||||
|
||||
hits = search_qdrant(vector, limit=3, domain=domain, score_threshold=0.3)
|
||||
|
||||
matches = []
|
||||
for hit in hits:
|
||||
payload = hit.get("payload", {})
|
||||
matches.append({
|
||||
"score": round(hit.get("score", 0), 4),
|
||||
"claim_path": payload.get("claim_path", ""),
|
||||
"claim_title": payload.get("claim_title", ""),
|
||||
"domain": payload.get("domain", ""),
|
||||
})
|
||||
|
||||
highest = matches[0]["score"] if matches else 0.0
|
||||
|
||||
if highest >= threshold:
|
||||
verdict = "duplicate"
|
||||
elif highest >= 0.70:
|
||||
verdict = "check_manually"
|
||||
else:
|
||||
verdict = "novel"
|
||||
|
||||
return {
|
||||
"query": text[:100],
|
||||
"is_duplicate": highest >= threshold,
|
||||
"highest_score": highest,
|
||||
"verdict": verdict,
|
||||
"matches": matches,
|
||||
}
|
||||
86
lib/stale_pr.py
Normal file
86
lib/stale_pr.py
Normal file
|
|
@ -0,0 +1,86 @@
|
|||
"""Stale extraction PR cleanup — closes extraction PRs that produce no claims.
|
||||
|
||||
When an extraction PR sits open >30 min with claims_count=0, it indicates:
|
||||
- Extraction failed (model couldn't extract anything useful)
|
||||
- Batch job stalled (no claims written)
|
||||
- Source material is empty/junk
|
||||
|
||||
Auto-closing prevents zombie PRs from blocking the pipeline.
|
||||
Logs each close for root cause analysis (model failures, bad sources, etc.).
|
||||
|
||||
Epimetheus owns this module.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from . import config, db
|
||||
from .forgejo import api, repo_path
|
||||
from .pr_state import close_pr
|
||||
|
||||
logger = logging.getLogger("pipeline.stale_pr")
|
||||
|
||||
STALE_THRESHOLD_MINUTES = 45
|
||||
|
||||
|
||||
async def check_stale_prs(conn) -> tuple[int, int]:
|
||||
"""Auto-close extraction PRs open >30 min with zero claims.
|
||||
|
||||
Returns (stale_closed, stale_errors) — count of closed PRs and close failures.
|
||||
"""
|
||||
stale_closed = 0
|
||||
stale_errors = 0
|
||||
|
||||
# Find extraction PRs: open >30 min, source has 0 claims
|
||||
stale_prs = conn.execute(
|
||||
"""SELECT p.number, p.branch, p.source_path, p.created_at
|
||||
FROM prs p
|
||||
LEFT JOIN sources s ON p.source_path = s.path
|
||||
WHERE p.status = 'open'
|
||||
AND p.commit_type = 'extract'
|
||||
AND datetime(p.created_at) < datetime('now', '-' || ? || ' minutes')
|
||||
AND COALESCE(s.claims_count, 0) = 0""",
|
||||
(STALE_THRESHOLD_MINUTES,),
|
||||
).fetchall()
|
||||
|
||||
for pr in stale_prs:
|
||||
pr_num = pr["number"]
|
||||
source_path = pr["source_path"] or "unknown"
|
||||
|
||||
try:
|
||||
closed = await close_pr(conn, pr_num,
|
||||
last_error=f"stale: no claims after {STALE_THRESHOLD_MINUTES} min")
|
||||
if not closed:
|
||||
stale_errors += 1
|
||||
logger.warning(
|
||||
"Failed to close stale extraction PR #%d (%s, %s)",
|
||||
pr_num, source_path, pr["branch"],
|
||||
)
|
||||
continue
|
||||
|
||||
db.audit(
|
||||
conn,
|
||||
"watchdog",
|
||||
"stale_pr_closed",
|
||||
json.dumps({
|
||||
"pr": pr_num,
|
||||
"branch": pr["branch"],
|
||||
"source": source_path,
|
||||
"open_minutes": STALE_THRESHOLD_MINUTES,
|
||||
}),
|
||||
)
|
||||
stale_closed += 1
|
||||
logger.info(
|
||||
"WATCHDOG: closed stale extraction PR #%d (no claims after %d min): %s",
|
||||
pr_num, STALE_THRESHOLD_MINUTES, source_path,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
stale_errors += 1
|
||||
logger.warning(
|
||||
"Stale PR close exception for #%d: %s",
|
||||
pr_num, e,
|
||||
)
|
||||
|
||||
return stale_closed, stale_errors
|
||||
|
|
@ -24,12 +24,13 @@ from pathlib import Path
|
|||
|
||||
from . import config, db
|
||||
from .forgejo import api as forgejo_api, get_agent_token, get_pr_diff, repo_path
|
||||
from .pr_state import close_pr, reset_for_reeval, start_fixing
|
||||
from .llm import openrouter_call
|
||||
|
||||
logger = logging.getLogger("pipeline.substantive_fixer")
|
||||
|
||||
# Issue type routing
|
||||
FIXABLE_TAGS = {"confidence_miscalibration", "title_overclaims", "scope_error", "frontmatter_schema"}
|
||||
FIXABLE_TAGS = {"confidence_miscalibration", "title_overclaims", "scope_error", "frontmatter_schema", "date_errors"}
|
||||
CONVERTIBLE_TAGS = {"near_duplicate"}
|
||||
UNFIXABLE_TAGS = {"factual_discrepancy"}
|
||||
|
||||
|
|
@ -78,6 +79,8 @@ def _build_fix_prompt(
|
|||
issue_descriptions.append("TITLE: Reviewer says the title asserts more than the evidence supports.")
|
||||
elif tag == "scope_error":
|
||||
issue_descriptions.append("SCOPE: Reviewer says the claim needs explicit scope qualification.")
|
||||
elif tag == "date_errors":
|
||||
issue_descriptions.append("DATES: Reviewer flagged incorrect, missing, or inconsistent dates in the claim. Check created dates, event dates cited in the body, and any temporal claims against the source material.")
|
||||
elif tag == "near_duplicate":
|
||||
issue_descriptions.append("DUPLICATE: Reviewer says this substantially duplicates an existing claim.")
|
||||
|
||||
|
|
@ -223,20 +226,10 @@ def _classify_substantive(issues: list[str]) -> str:
|
|||
|
||||
async def _fix_pr(conn, pr_number: int) -> dict:
|
||||
"""Attempt a substantive fix on a single PR. Returns result dict."""
|
||||
# Atomic claim
|
||||
cursor = conn.execute(
|
||||
"UPDATE prs SET status = 'fixing', last_attempt = datetime('now') WHERE number = ? AND status = 'open'",
|
||||
(pr_number,),
|
||||
)
|
||||
if cursor.rowcount == 0:
|
||||
# Atomic claim — prevent concurrent fixers and evaluators
|
||||
if not start_fixing(conn, pr_number):
|
||||
return {"pr": pr_number, "skipped": True, "reason": "not_open"}
|
||||
|
||||
# Increment fix attempts
|
||||
conn.execute(
|
||||
"UPDATE prs SET fix_attempts = COALESCE(fix_attempts, 0) + 1 WHERE number = ?",
|
||||
(pr_number,),
|
||||
)
|
||||
|
||||
row = conn.execute(
|
||||
"SELECT branch, source_path, domain, eval_issues, fix_attempts FROM prs WHERE number = ?",
|
||||
(pr_number,),
|
||||
|
|
@ -269,10 +262,7 @@ async def _fix_pr(conn, pr_number: int) -> dict:
|
|||
|
||||
if classification == "droppable":
|
||||
logger.info("PR #%d: droppable (%s) — closing", pr_number, issues)
|
||||
conn.execute(
|
||||
"UPDATE prs SET status = 'closed', last_error = ? WHERE number = ?",
|
||||
(f"droppable: {issues}", pr_number),
|
||||
)
|
||||
await close_pr(conn, pr_number, last_error=f"droppable: {issues}")
|
||||
return {"pr": pr_number, "action": "closed_droppable", "issues": issues}
|
||||
|
||||
# Refresh main worktree for source read (Ganymede: ensure freshness)
|
||||
|
|
@ -300,11 +290,8 @@ async def _fix_pr(conn, pr_number: int) -> dict:
|
|||
conn, pr_number, claim_files, domain,
|
||||
)
|
||||
if result.get("converted"):
|
||||
conn.execute(
|
||||
"UPDATE prs SET status = 'closed', last_error = ? WHERE number = ?",
|
||||
(f"auto-enriched: {result['target_claim']} (sim={result['similarity']:.2f})", pr_number),
|
||||
)
|
||||
await forgejo_api("PATCH", repo_path(f"pulls/{pr_number}"), {"state": "closed"})
|
||||
await close_pr(conn, pr_number,
|
||||
last_error=f"auto-enriched: {result['target_claim']} (sim={result['similarity']:.2f})")
|
||||
await forgejo_api("POST", repo_path(f"issues/{pr_number}/comments"), {
|
||||
"body": (
|
||||
f"**Auto-converted:** Evidence from this PR enriched "
|
||||
|
|
@ -333,7 +320,7 @@ async def _fix_pr(conn, pr_number: int) -> dict:
|
|||
fixed_any = False
|
||||
for filepath, content in claim_files.items():
|
||||
prompt = _build_fix_prompt(content, review_text, issues, source_content, domain_index)
|
||||
result = await openrouter_call(FIX_MODEL, prompt, timeout_sec=120, max_tokens=4096)
|
||||
result, _usage = await openrouter_call(FIX_MODEL, prompt, timeout_sec=120, max_tokens=4096)
|
||||
|
||||
if not result:
|
||||
logger.warning("PR #%d: fix LLM call failed for %s", pr_number, filepath)
|
||||
|
|
@ -377,7 +364,7 @@ async def _fix_pr(conn, pr_number: int) -> dict:
|
|||
# Write fixed files
|
||||
for filepath, content in claim_files.items():
|
||||
prompt = _build_fix_prompt(content, review_text, issues, source_content, domain_index)
|
||||
fixed_content = await openrouter_call(FIX_MODEL, prompt, timeout_sec=120, max_tokens=4096)
|
||||
fixed_content, _usage = await openrouter_call(FIX_MODEL, prompt, timeout_sec=120, max_tokens=4096)
|
||||
if fixed_content and not fixed_content.strip().startswith("{"):
|
||||
full_path = Path(worktree_path) / filepath
|
||||
full_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
|
@ -392,18 +379,7 @@ async def _fix_pr(conn, pr_number: int) -> dict:
|
|||
return {"pr": pr_number, "skipped": True, "reason": "nothing_to_commit"}
|
||||
|
||||
# Reset eval state BEFORE push (same pattern as fixer.py)
|
||||
conn.execute(
|
||||
"""UPDATE prs SET
|
||||
status = 'open',
|
||||
eval_attempts = 0,
|
||||
eval_issues = '[]',
|
||||
tier0_pass = NULL,
|
||||
domain_verdict = 'pending',
|
||||
leo_verdict = 'pending',
|
||||
last_error = NULL
|
||||
WHERE number = ?""",
|
||||
(pr_number,),
|
||||
)
|
||||
reset_for_reeval(conn, pr_number)
|
||||
|
||||
rc, out = await _git("push", "origin", branch, cwd=worktree_path, timeout=30)
|
||||
if rc != 0:
|
||||
|
|
@ -497,13 +473,7 @@ async def _auto_convert_near_duplicate(
|
|||
|
||||
async def _close_and_reextract(conn, pr_number: int, issues: list[str]):
|
||||
"""Close PR and mark source for re-extraction with feedback."""
|
||||
await forgejo_api(
|
||||
"PATCH", repo_path(f"pulls/{pr_number}"), {"state": "closed"},
|
||||
)
|
||||
conn.execute(
|
||||
"UPDATE prs SET status = 'closed', last_error = ? WHERE number = ?",
|
||||
(f"unfixable: {', '.join(issues)}", pr_number),
|
||||
)
|
||||
await close_pr(conn, pr_number, last_error=f"unfixable: {', '.join(issues)}")
|
||||
conn.execute(
|
||||
"""UPDATE sources SET status = 'needs_reextraction', feedback = ?,
|
||||
updated_at = datetime('now')
|
||||
|
|
@ -524,7 +494,7 @@ async def _flag_for_leo_review(
|
|||
# Use LLM to identify candidate matches
|
||||
if domain_index:
|
||||
prompt = _build_fix_prompt(first_claim, review_text, ["near_duplicate"], None, domain_index)
|
||||
result = await openrouter_call(FIX_MODEL, prompt, timeout_sec=60, max_tokens=1024)
|
||||
result, _usage = await openrouter_call(FIX_MODEL, prompt, timeout_sec=60, max_tokens=1024)
|
||||
candidates_text = result or "Could not identify candidates."
|
||||
else:
|
||||
candidates_text = "No domain index available."
|
||||
|
|
|
|||
|
|
@ -140,7 +140,12 @@ def validate_schema(fm: dict) -> list[str]:
|
|||
valid_conf = schema.get("valid_confidence")
|
||||
confidence = fm.get("confidence")
|
||||
if valid_conf and confidence and confidence not in valid_conf:
|
||||
violations.append(f"invalid_confidence:{confidence}")
|
||||
# Common LLM aliases — normalize before failing
|
||||
_CONFIDENCE_ALIASES = {"high": "likely", "medium": "experimental", "low": "speculative", "very high": "proven", "moderate": "experimental"}
|
||||
if isinstance(confidence, str) and confidence.lower().strip() in _CONFIDENCE_ALIASES:
|
||||
pass # Fixable by post-extract or fixer — don't gate on this
|
||||
else:
|
||||
violations.append(f"invalid_confidence:{confidence}")
|
||||
|
||||
desc = fm.get("description")
|
||||
if isinstance(desc, str) and len(desc.strip()) < 10:
|
||||
|
|
@ -550,6 +555,16 @@ def tier05_mechanical_check(diff: str, existing_claims: set[str] | None = None)
|
|||
is_new = filepath in new_files
|
||||
|
||||
if is_new:
|
||||
# Strip code fences — LLM agents sometimes wrap content in ```markdown or ```yaml
|
||||
stripped = content.strip()
|
||||
if stripped.startswith("```"):
|
||||
first_nl = stripped.find("\n")
|
||||
if first_nl != -1:
|
||||
stripped = stripped[first_nl + 1:]
|
||||
if stripped.endswith("```"):
|
||||
stripped = stripped[:-3].strip()
|
||||
content = stripped
|
||||
|
||||
fm, body = parse_frontmatter(content)
|
||||
if fm is None:
|
||||
issues.append("frontmatter_schema")
|
||||
|
|
@ -620,6 +635,27 @@ async def validate_pr(conn, pr_number: int) -> dict:
|
|||
# Extract claim files (domains/, core/, foundations/)
|
||||
claim_files = extract_claim_files_from_diff(diff)
|
||||
|
||||
# ── Backfill description (claim titles) if missing ──
|
||||
# discover_external_prs creates rows without description. Extract H1 titles
|
||||
# from the diff so the dashboard shows what the PR actually contains.
|
||||
existing_desc = conn.execute(
|
||||
"SELECT description FROM prs WHERE number = ?", (pr_number,)
|
||||
).fetchone()
|
||||
if existing_desc and not (existing_desc["description"] or "").strip() and claim_files:
|
||||
titles = []
|
||||
for _fp, content in claim_files.items():
|
||||
for line in content.split("\n"):
|
||||
if line.startswith("# ") and len(line) > 3:
|
||||
titles.append(line[2:].strip())
|
||||
break
|
||||
if titles:
|
||||
desc = " | ".join(titles)
|
||||
conn.execute(
|
||||
"UPDATE prs SET description = ? WHERE number = ? AND (description IS NULL OR description = '')",
|
||||
(desc, pr_number),
|
||||
)
|
||||
logger.info("PR #%d: backfilled description with %d claim titles", pr_number, len(titles))
|
||||
|
||||
# ── Tier 0: per-claim validation ──
|
||||
# Only validates NEW files (not modified). Modified files have partial content
|
||||
# from diffs (only + lines) — frontmatter parsing fails on partial content,
|
||||
|
|
|
|||
100
lib/watchdog.py
100
lib/watchdog.py
|
|
@ -19,6 +19,7 @@ import logging
|
|||
from datetime import datetime, timezone
|
||||
|
||||
from . import config, db
|
||||
from .stale_pr import check_stale_prs
|
||||
|
||||
logger = logging.getLogger("pipeline.watchdog")
|
||||
|
||||
|
|
@ -103,17 +104,94 @@ async def watchdog_check(conn) -> dict:
|
|||
"action": "GC should auto-close these — check fixer.py GC logic",
|
||||
})
|
||||
|
||||
# 5. Tier0 blockage: many PRs with tier0_pass=0 (potential validation bug)
|
||||
# 5. Tier0 blockage: auto-reset stuck PRs with retry cap
|
||||
MAX_TIER0_RESETS = 3
|
||||
TIER0_RESET_COOLDOWN_S = 3600
|
||||
tier0_blocked = conn.execute(
|
||||
"SELECT COUNT(*) as n FROM prs WHERE status = 'open' AND tier0_pass = 0"
|
||||
).fetchone()["n"]
|
||||
if tier0_blocked >= 5:
|
||||
issues.append({
|
||||
"type": "tier0_blockage",
|
||||
"severity": "warning",
|
||||
"detail": f"{tier0_blocked} PRs blocked at tier0_pass=0",
|
||||
"action": "Check validate.py — may be the modified-file or wiki-link bug recurring",
|
||||
})
|
||||
"SELECT number, branch FROM prs WHERE status = 'open' AND tier0_pass = 0"
|
||||
).fetchall()
|
||||
|
||||
if tier0_blocked:
|
||||
reset_count = 0
|
||||
permanent_count = 0
|
||||
|
||||
for pr in tier0_blocked:
|
||||
row = conn.execute(
|
||||
"""SELECT COUNT(*) as n, MAX(timestamp) as last_ts FROM audit_log
|
||||
WHERE stage = 'watchdog' AND event = 'tier0_reset'
|
||||
AND json_extract(detail, '$.pr') = ?""",
|
||||
(pr["number"],),
|
||||
).fetchone()
|
||||
prior_resets = row["n"]
|
||||
|
||||
if prior_resets >= MAX_TIER0_RESETS:
|
||||
permanent_count += 1
|
||||
continue
|
||||
|
||||
last_reset = row["last_ts"]
|
||||
|
||||
if last_reset:
|
||||
try:
|
||||
last_ts = datetime.fromisoformat(last_reset).replace(tzinfo=timezone.utc)
|
||||
age = (datetime.now(timezone.utc) - last_ts).total_seconds()
|
||||
if age < TIER0_RESET_COOLDOWN_S:
|
||||
continue
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
|
||||
conn.execute(
|
||||
"UPDATE prs SET tier0_pass = NULL WHERE number = ?",
|
||||
(pr["number"],),
|
||||
)
|
||||
db.audit(
|
||||
conn, "watchdog", "tier0_reset",
|
||||
json.dumps({
|
||||
"pr": pr["number"],
|
||||
"branch": pr["branch"],
|
||||
"attempt": prior_resets + 1,
|
||||
"max": MAX_TIER0_RESETS,
|
||||
}),
|
||||
)
|
||||
reset_count += 1
|
||||
logger.info(
|
||||
"WATCHDOG: auto-reset tier0 for PR #%d (attempt %d/%d)",
|
||||
pr["number"], prior_resets + 1, MAX_TIER0_RESETS,
|
||||
)
|
||||
|
||||
if reset_count:
|
||||
issues.append({
|
||||
"type": "tier0_reset",
|
||||
"severity": "info",
|
||||
"detail": f"Auto-reset {reset_count} PRs stuck at tier0_pass=0 for re-validation",
|
||||
"action": "Monitor — if same PRs fail again, check validate.py",
|
||||
})
|
||||
if permanent_count:
|
||||
issues.append({
|
||||
"type": "tier0_permanent_failure",
|
||||
"severity": "warning",
|
||||
"detail": f"{permanent_count} PRs exhausted {MAX_TIER0_RESETS} tier0 retries — manual intervention needed",
|
||||
"action": "Inspect PR content or close stale PRs",
|
||||
})
|
||||
|
||||
# 6. Stale extraction PRs: open >30 min with no claim files
|
||||
try:
|
||||
stale_closed, stale_errors = await check_stale_prs(conn)
|
||||
if stale_closed > 0:
|
||||
issues.append({
|
||||
"type": "stale_prs_closed",
|
||||
"severity": "info",
|
||||
"detail": f"Auto-closed {stale_closed} stale extraction PRs (no claims after 30 min)",
|
||||
"action": "Check batch-extract logs for extraction failures",
|
||||
})
|
||||
if stale_errors > 0:
|
||||
issues.append({
|
||||
"type": "stale_pr_close_failed",
|
||||
"severity": "warning",
|
||||
"detail": f"Failed to close {stale_errors} stale PRs",
|
||||
"action": "Check Forgejo API connectivity",
|
||||
})
|
||||
except Exception as e:
|
||||
logger.warning("Stale PR check failed: %s", e)
|
||||
|
||||
# Log issues
|
||||
healthy = len(issues) == 0
|
||||
|
|
@ -124,7 +202,7 @@ async def watchdog_check(conn) -> dict:
|
|||
else:
|
||||
logger.info("WATCHDOG: %s — %s", issue["type"], issue["detail"])
|
||||
|
||||
return {"healthy": healthy, "issues": issues, "checks_run": 5}
|
||||
return {"healthy": healthy, "issues": issues, "checks_run": 6}
|
||||
|
||||
|
||||
async def watchdog_cycle(conn, max_workers=None) -> tuple[int, int]:
|
||||
|
|
|
|||
113
ops/backfill-contributor-roles.py
Normal file
113
ops/backfill-contributor-roles.py
Normal file
|
|
@ -0,0 +1,113 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Backfill contributor role counts from prs.commit_type.
|
||||
|
||||
Resets all role counts to 0, then re-derives them from the prs table's
|
||||
commit_type column using the COMMIT_TYPE_TO_ROLE mapping. This corrects
|
||||
the bug where all contributors were recorded as 'extractor' regardless
|
||||
of their actual commit_type.
|
||||
|
||||
Usage:
|
||||
python3 ops/backfill-contributor-roles.py [--dry-run]
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import sqlite3
|
||||
import sys
|
||||
import os
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
from lib.contributor import COMMIT_TYPE_TO_ROLE, commit_type_to_role
|
||||
|
||||
DB_PATH = os.environ.get("PIPELINE_DB", "/opt/teleo-eval/pipeline/pipeline.db")
|
||||
|
||||
|
||||
def backfill(db_path: str, dry_run: bool = False):
|
||||
conn = sqlite3.connect(db_path)
|
||||
conn.row_factory = sqlite3.Row
|
||||
|
||||
# Get all merged PRs with commit_type and agent
|
||||
prs = conn.execute("""
|
||||
SELECT number, commit_type, agent, branch
|
||||
FROM prs
|
||||
WHERE status = 'merged' AND agent IS NOT NULL
|
||||
ORDER BY number
|
||||
""").fetchall()
|
||||
|
||||
print(f"Processing {len(prs)} merged PRs...")
|
||||
|
||||
# Reset all role counts
|
||||
if not dry_run:
|
||||
conn.execute("""
|
||||
UPDATE contributors SET
|
||||
extractor_count = 0,
|
||||
challenger_count = 0,
|
||||
synthesizer_count = 0,
|
||||
sourcer_count = 0
|
||||
""")
|
||||
print("Reset all role counts to 0")
|
||||
|
||||
# Tally roles from commit_type
|
||||
role_counts: dict[str, dict[str, int]] = {}
|
||||
for pr in prs:
|
||||
agent = pr["agent"].lower() if pr["agent"] else None
|
||||
if not agent or agent in ("external", "pipeline"):
|
||||
continue
|
||||
|
||||
commit_type = pr["commit_type"] or "extract"
|
||||
role = commit_type_to_role(commit_type)
|
||||
|
||||
if agent not in role_counts:
|
||||
role_counts[agent] = {
|
||||
"extractor_count": 0, "challenger_count": 0,
|
||||
"synthesizer_count": 0, "sourcer_count": 0,
|
||||
"reviewer_count": 0,
|
||||
}
|
||||
role_col = f"{role}_count"
|
||||
if role_col in role_counts[agent]:
|
||||
role_counts[agent][role_col] += 1
|
||||
|
||||
# Apply tallied counts
|
||||
for handle, counts in sorted(role_counts.items()):
|
||||
non_zero = {k: v for k, v in counts.items() if v > 0}
|
||||
print(f" {handle}: {non_zero or '(no knowledge PRs)'}")
|
||||
if not dry_run and non_zero:
|
||||
set_clauses = ", ".join(f"{k} = {v}" for k, v in non_zero.items())
|
||||
conn.execute(
|
||||
f"UPDATE contributors SET {set_clauses}, updated_at = datetime('now') WHERE handle = ?",
|
||||
(handle,),
|
||||
)
|
||||
|
||||
if not dry_run:
|
||||
conn.commit()
|
||||
print("\nBackfill committed.")
|
||||
else:
|
||||
print("\n[DRY RUN] No changes made.")
|
||||
|
||||
# Print summary
|
||||
print("\nRole distribution across all contributors:")
|
||||
if not dry_run:
|
||||
rows = conn.execute("""
|
||||
SELECT handle, extractor_count, challenger_count, synthesizer_count,
|
||||
sourcer_count, reviewer_count
|
||||
FROM contributors
|
||||
ORDER BY (extractor_count + challenger_count + synthesizer_count) DESC
|
||||
""").fetchall()
|
||||
for r in rows:
|
||||
parts = []
|
||||
if r["extractor_count"]: parts.append(f"extract:{r['extractor_count']}")
|
||||
if r["challenger_count"]: parts.append(f"challenge:{r['challenger_count']}")
|
||||
if r["synthesizer_count"]: parts.append(f"synthesize:{r['synthesizer_count']}")
|
||||
if r["sourcer_count"]: parts.append(f"source:{r['sourcer_count']}")
|
||||
if r["reviewer_count"]: parts.append(f"review:{r['reviewer_count']}")
|
||||
if parts:
|
||||
print(f" {r['handle']}: {', '.join(parts)}")
|
||||
|
||||
conn.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--dry-run", action="store_true")
|
||||
parser.add_argument("--db", default=DB_PATH)
|
||||
args = parser.parse_args()
|
||||
backfill(args.db, args.dry_run)
|
||||
92
research/entity-session.sh
Executable file
92
research/entity-session.sh
Executable file
|
|
@ -0,0 +1,92 @@
|
|||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
AGENT="rio"
|
||||
BRANCH="${AGENT}/entity-population-$(date +%Y-%m-%d)"
|
||||
WORKSPACE="/opt/teleo-eval/workspaces/entity-${AGENT}"
|
||||
LOG="/opt/teleo-eval/logs/entity-${AGENT}.log"
|
||||
BRIEF="/opt/teleo-eval/entity-research-brief.md"
|
||||
SCHEMA="/opt/teleo-eval/entity-schema.md"
|
||||
|
||||
log() { echo "[$(date -Iseconds)] $1" | tee -a "$LOG"; }
|
||||
|
||||
# Setup workspace
|
||||
if [ ! -d "$WORKSPACE" ]; then
|
||||
log "Cloning fresh workspace..."
|
||||
git clone http://localhost:3000/teleo/teleo-codex.git "$WORKSPACE"
|
||||
fi
|
||||
|
||||
cd "$WORKSPACE"
|
||||
git checkout main
|
||||
git pull origin main
|
||||
git checkout -b "$BRANCH"
|
||||
|
||||
# Copy schema into workspace
|
||||
cp "$SCHEMA" schemas/entity.md
|
||||
|
||||
# Create entities directory
|
||||
mkdir -p entities/internet-finance
|
||||
|
||||
log "On branch $BRANCH"
|
||||
log "Starting Claude entity population session..."
|
||||
|
||||
# Build the prompt
|
||||
PROMPT="You are Rio, the internet finance domain agent for the Teleo Codex knowledge base.
|
||||
|
||||
Your task: populate the first entity files for the knowledge base, focusing on the futarchic ecosystem.
|
||||
|
||||
RESEARCH BRIEF:
|
||||
$(cat "$BRIEF")
|
||||
|
||||
ENTITY SCHEMA:
|
||||
$(cat "$SCHEMA")
|
||||
|
||||
INSTRUCTIONS:
|
||||
1. Read the research brief carefully
|
||||
2. Read the entity schema at schemas/entity.md
|
||||
3. Read existing claims in domains/internet-finance/ for context
|
||||
4. Read relevant source archives in inbox/archive/
|
||||
5. Use web search to find current data for each entity (market caps, metrics, recent events)
|
||||
6. Create entity files in entities/internet-finance/ following the schema exactly
|
||||
7. Start with the companies and people listed in the brief
|
||||
8. Create the market entity for futarchic markets
|
||||
9. Make sure all wiki links point to real existing files
|
||||
10. Add timeline events with dates
|
||||
11. Include competitive positioning for companies
|
||||
12. Include known positions and credibility basis for people
|
||||
|
||||
Create all 12 entities listed in the brief. Quality over speed."
|
||||
|
||||
# Run Claude
|
||||
timeout 5400 /home/teleo/.local/bin/claude -p "$PROMPT" \
|
||||
--model opus \
|
||||
--allowedTools Read,Write,Edit,Glob,Grep,WebSearch,WebFetch \
|
||||
2>&1 | tee -a "$LOG" || true
|
||||
|
||||
# Commit and push
|
||||
log "Session complete. Committing..."
|
||||
git add entities/ schemas/entity.md
|
||||
ENTITY_COUNT=$(find entities/ -name "*.md" | wc -l)
|
||||
git commit -m "rio: populate ${ENTITY_COUNT} entity files — futarchic ecosystem
|
||||
|
||||
- What: First entity population using new entity schema
|
||||
- Why: Cory directive — agents need industry analysis, not just claims
|
||||
- Schema: entities track companies, people, markets with temporal data
|
||||
|
||||
Pentagon-Agent: Rio <CE7B8202-2877-4C70-8AAB-B05F832F50EA>" || log "Nothing to commit"
|
||||
|
||||
git push -u origin "$BRANCH" || log "Push failed"
|
||||
|
||||
# Create PR
|
||||
PR_URL=$(curl -s -X POST "http://localhost:3000/api/v1/repos/teleo/teleo-codex/pulls" \
|
||||
-H "Authorization: token $(cat /opt/teleo-eval/secrets/forgejo-admin-token)" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{
|
||||
\"title\": \"rio: entity schema + ${ENTITY_COUNT} entity files — futarchic ecosystem\",
|
||||
\"body\": \"## Summary\n\nNew entity schema + first population of entity files for the futarchic ecosystem.\n\nEntities track companies, people, and markets as dynamic objects with temporal attributes — a parallel input to beliefs alongside claims.\n\n### Entities created:\n- Companies: MetaDAO, Solomon, Ranger Finance, MycoRealms, Futardio, Aave, Polymarket\n- People: Stani Kulechov, Proph3t, Gabriel Shapiro, Felipe Montealegre\n- Markets: Futarchic Markets ecosystem\n\nDesigned by Leo, populated by Rio.\",
|
||||
\"head\": \"${BRANCH}\",
|
||||
\"base\": \"main\"
|
||||
}" | python3 -c "import sys,json; print(json.load(sys.stdin).get(html_url,no url))")
|
||||
|
||||
log "PR opened: $PR_URL"
|
||||
log "=== Entity session complete for ${AGENT} ==="
|
||||
12
research/prompts/changelog.md
Normal file
12
research/prompts/changelog.md
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
---
|
||||
description: Prompt version history — what changed and why
|
||||
---
|
||||
|
||||
## rio-system-v1 (2026-03-31)
|
||||
- Initial extraction from inline string in telegram/response.py
|
||||
- RESEARCH tag tightened: "ONLY use when user explicitly asks about recent activity"
|
||||
- No structural changes from deployed version
|
||||
|
||||
## Planned
|
||||
- rio-system-v2: Query classification injected into system prompt (Layer 2)
|
||||
- eval-grounding-v1: Haiku grounding check prompt (Layer 3)
|
||||
62
research/prompts/rio-system-v1.md
Normal file
62
research/prompts/rio-system-v1.md
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
---
|
||||
version: 1
|
||||
model: anthropic/claude-opus-4
|
||||
author: epimetheus
|
||||
date: 2026-03-31
|
||||
description: Rio's main Telegram response system prompt
|
||||
parent_version: null
|
||||
---
|
||||
|
||||
You are Rio, the Teleo internet finance agent. Your Telegram handle is @FutAIrdBot — that IS you. Users tag @FutAIrdBot to reach you. Never say "I'm not FutAIrdBot." You are also @futaRdIO on X. You have deep knowledge about futarchy, prediction markets, token governance, and the MetaDAO ecosystem.
|
||||
|
||||
## How to sound
|
||||
Write like a sharp analyst talking to peers, not like an AI. Specifically:
|
||||
- Use your knowledge naturally. Don't say "the KB tracks" or "at experimental confidence" or "our claims show." Just state what you know and how confident you are in plain language.
|
||||
- Have a take. You're an analyst, not a summarizer. Say what you actually think.
|
||||
- Before you respond, ask yourself: "Does every sentence here add something the user doesn't already know?" If a sentence just restates context, agrees without adding insight, or pads with filler — cut it. Your goal is signal density, not word count.
|
||||
- Short questions deserve short answers. If someone asks a factual question, give the fact. Don't surround it with caveats, context, and "the honest picture is" framing.
|
||||
- Long answers are fine when the question is genuinely complex or the user asks for depth. But earn every paragraph — each one should contain a distinct insight the previous one didn't cover.
|
||||
- Match the user's energy. If they wrote one line, respond in kind.
|
||||
- Sound human. No em dashes, no "That said", no "It's worth noting." Just say the thing.
|
||||
- No markdown. Plain text only.
|
||||
- When you're uncertain, just say so simply. "I'm not sure about X" beats "we don't have data on this yet."
|
||||
|
||||
## Your learnings (corrections from past conversations — prioritize these over KB data when they conflict)
|
||||
{learnings}
|
||||
|
||||
## What you know about this topic
|
||||
{kb_context}
|
||||
|
||||
{market_section}
|
||||
|
||||
{research_context}
|
||||
|
||||
{x_link_context}
|
||||
|
||||
## Conversation History (NEVER ask a question your history already answers)
|
||||
{conversation_history}
|
||||
|
||||
## The message you're responding to
|
||||
From: @{username}
|
||||
Message: {message}
|
||||
|
||||
Respond now. Be substantive but concise. If they're wrong about something, say so directly. If they know something you don't, tell them it's worth digging into. If they correct you, accept it and build on the correction. Do NOT respond to messages that aren't directed at you — only respond when tagged or replied to.
|
||||
|
||||
IMPORTANT: Special tags you can append at the end of your response (after your main text):
|
||||
|
||||
1. LEARNING: [category] [what you learned]
|
||||
Categories: factual, communication, structured_data
|
||||
Only when genuinely learned something. Most responses have none.
|
||||
NEVER save a learning about what data you do or don't have access to.
|
||||
|
||||
2. RESEARCH: [search query]
|
||||
Triggers a live X search and sends results back to the chat. ONLY use when the user explicitly asks about recent activity, live sentiment, or breaking news that the KB can't answer. Do NOT use for general knowledge questions — if you already answered from KB context, don't also trigger a search.
|
||||
|
||||
3. SOURCE: [description of what to ingest]
|
||||
When a user shares valuable source material (X posts, articles, data). Creates a source file in the ingestion pipeline, attributed to the user. Include the verbatim content — don't alter or summarize the user's contribution. Use this when someone drops a link or shares original analysis worth preserving.
|
||||
|
||||
4. CLAIM: [specific, disagreeable assertion]
|
||||
When a user makes a specific claim with evidence that could enter the KB. Creates a draft claim file attributed to them. Only for genuine claims — not opinions or questions.
|
||||
|
||||
5. CONFIDENCE: [0.0-1.0]
|
||||
ALWAYS include this tag. Rate how well the KB context above actually helped you answer this question. 1.0 = KB had exactly what was needed. 0.5 = KB had partial/tangential info. 0.0 = KB had nothing relevant, you answered from general knowledge. This is for internal audit only — never visible to users.
|
||||
480
research/research-session.sh
Executable file
480
research/research-session.sh
Executable file
|
|
@ -0,0 +1,480 @@
|
|||
#!/bin/bash
|
||||
# Run a self-directed research session for one agent.
|
||||
# Usage: ./research-session.sh <agent-name>
|
||||
# Example: ./research-session.sh clay
|
||||
#
|
||||
# What it does:
|
||||
# 1. Pulls latest tweets from the agent's network accounts (X API)
|
||||
# 2. Gives Claude the agent's identity, beliefs, and current KB state
|
||||
# 3. Agent picks a research direction and archives sources with notes
|
||||
# 4. Commits source archives to a branch, pushes, opens PR
|
||||
# 5. Extract cron picks up the unprocessed sources separately
|
||||
#
|
||||
# The researcher never extracts — a separate Claude instance does that.
|
||||
# This prevents motivated reasoning in extraction.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
AGENT="${1:?Usage: $0 <agent-name>}"
|
||||
REPO_DIR="/opt/teleo-eval/workspaces/research-${AGENT}"
|
||||
FORGEJO_URL="http://localhost:3000"
|
||||
FORGEJO_ADMIN_TOKEN=$(cat /opt/teleo-eval/secrets/forgejo-admin-token)
|
||||
AGENT_TOKEN=$(cat "/opt/teleo-eval/secrets/forgejo-${AGENT}-token" 2>/dev/null || echo "$FORGEJO_ADMIN_TOKEN")
|
||||
TWITTER_API_KEY=$(cat /opt/teleo-eval/secrets/twitterapi-io-key)
|
||||
CLAUDE_BIN="/home/teleo/.local/bin/claude"
|
||||
LOG_DIR="/opt/teleo-eval/logs"
|
||||
LOG="$LOG_DIR/research-${AGENT}.log"
|
||||
LOCKFILE="/tmp/research-${AGENT}.lock"
|
||||
DATE=$(date +%Y-%m-%d)
|
||||
BRANCH="${AGENT}/research-${DATE}"
|
||||
RAW_DIR="/opt/teleo-eval/research-raw/${AGENT}"
|
||||
|
||||
log() { echo "[$(date -Iseconds)] $*" >> "$LOG"; }
|
||||
|
||||
# --- Agent State ---
|
||||
STATE_LIB="/opt/teleo-eval/ops/agent-state/lib-state.sh"
|
||||
if [ -f "$STATE_LIB" ]; then
|
||||
source "$STATE_LIB"
|
||||
HAS_STATE=true
|
||||
SESSION_ID="${AGENT}-$(date +%Y%m%d-%H%M%S)"
|
||||
else
|
||||
HAS_STATE=false
|
||||
log "WARN: agent-state lib not found, running without state"
|
||||
fi
|
||||
|
||||
# --- Lock (prevent concurrent sessions for same agent) ---
|
||||
if [ -f "$LOCKFILE" ]; then
|
||||
pid=$(cat "$LOCKFILE" 2>/dev/null)
|
||||
if kill -0 "$pid" 2>/dev/null; then
|
||||
log "SKIP: research session already running for $AGENT (pid $pid)"
|
||||
exit 0
|
||||
fi
|
||||
log "WARN: stale lockfile for $AGENT, removing"
|
||||
rm -f "$LOCKFILE"
|
||||
fi
|
||||
echo $$ > "$LOCKFILE"
|
||||
TWEET_FILE="/tmp/research-tweets-${AGENT}.md"
|
||||
trap 'rm -f "$LOCKFILE" "$TWEET_FILE"' EXIT
|
||||
|
||||
log "=== Starting research session for $AGENT ==="
|
||||
|
||||
# --- Ensure directories ---
|
||||
mkdir -p "$RAW_DIR" "$LOG_DIR"
|
||||
|
||||
# --- Clone or update repo ---
|
||||
if [ ! -d "$REPO_DIR/.git" ]; then
|
||||
log "Cloning repo for $AGENT research..."
|
||||
git -c http.extraHeader="Authorization: token $FORGEJO_ADMIN_TOKEN" \
|
||||
clone "${FORGEJO_URL}/teleo/teleo-codex.git" "$REPO_DIR" >> "$LOG" 2>&1
|
||||
fi
|
||||
|
||||
cd "$REPO_DIR"
|
||||
git remote set-url origin "${FORGEJO_URL}/teleo/teleo-codex.git" 2>/dev/null || true
|
||||
git -c http.extraHeader="Authorization: token $FORGEJO_ADMIN_TOKEN" checkout main >> "$LOG" 2>&1
|
||||
git -c http.extraHeader="Authorization: token $FORGEJO_ADMIN_TOKEN" pull --rebase >> "$LOG" 2>&1
|
||||
|
||||
# --- Map agent to domain ---
|
||||
case "$AGENT" in
|
||||
rio) DOMAIN="internet-finance" ;;
|
||||
clay) DOMAIN="entertainment" ;;
|
||||
theseus) DOMAIN="ai-alignment" ;;
|
||||
vida) DOMAIN="health" ;;
|
||||
astra) DOMAIN="space-development" ;;
|
||||
leo) DOMAIN="grand-strategy" ;;
|
||||
*) log "ERROR: Unknown agent $AGENT"; exit 1 ;;
|
||||
esac
|
||||
|
||||
# --- Pull tweets from agent's network ---
|
||||
# Check if agent has a network file in the repo
|
||||
NETWORK_FILE="agents/${AGENT}/network.json"
|
||||
if [ ! -f "$NETWORK_FILE" ]; then
|
||||
log "No network file at $NETWORK_FILE — agent will use KB context to decide what to research"
|
||||
TWEET_DATA=""
|
||||
else
|
||||
log "Pulling tweets from ${AGENT}'s network..."
|
||||
ACCOUNTS=$(python3 -c "
|
||||
import json, sys
|
||||
with open(sys.argv[1]) as f:
|
||||
data = json.load(f)
|
||||
for acct in data.get('accounts', []):
|
||||
if acct.get('tier') in ('core', 'extended'):
|
||||
print(acct['username'])
|
||||
" "$NETWORK_FILE" 2>/dev/null || true)
|
||||
|
||||
TWEET_DATA=""
|
||||
API_CALLS=0
|
||||
API_CACHED=0
|
||||
for USERNAME in $ACCOUNTS; do
|
||||
# Validate username (Twitter handles are alphanumeric + underscore only)
|
||||
if [[ ! "$USERNAME" =~ ^[a-zA-Z0-9_]+$ ]]; then
|
||||
log "WARN: Invalid username '$USERNAME' in network file, skipping"
|
||||
continue
|
||||
fi
|
||||
OUTFILE="$RAW_DIR/${USERNAME}.json"
|
||||
# Only pull if file doesn't exist or is older than 12 hours
|
||||
if [ ! -f "$OUTFILE" ] || [ $(find "$OUTFILE" -mmin +720 2>/dev/null | wc -l) -gt 0 ]; then
|
||||
log "Pulling @${USERNAME}..."
|
||||
curl -s "https://api.twitterapi.io/twitter/user/last_tweets?userName=${USERNAME}" \
|
||||
-H "X-API-Key: ${TWITTER_API_KEY}" \
|
||||
-o "$OUTFILE" 2>/dev/null || {
|
||||
log "WARN: Failed to pull @${USERNAME}"
|
||||
continue
|
||||
}
|
||||
API_CALLS=$((API_CALLS + 1))
|
||||
sleep 2 # Rate limit courtesy
|
||||
else
|
||||
API_CACHED=$((API_CACHED + 1))
|
||||
fi
|
||||
if [ -f "$OUTFILE" ]; then
|
||||
TWEET_DATA="${TWEET_DATA}
|
||||
--- @${USERNAME} tweets ---
|
||||
$(python3 -c "
|
||||
import json, sys
|
||||
try:
|
||||
d = json.load(open(sys.argv[1]))
|
||||
tweets = d.get('tweets', d.get('data', []))
|
||||
for t in tweets[:20]:
|
||||
text = t.get('text', '')[:500]
|
||||
likes = t.get('likeCount', t.get('public_metrics', {}).get('like_count', 0))
|
||||
date = t.get('createdAt', t.get('created_at', 'unknown'))
|
||||
url = t.get('twitterUrl', t.get('url', ''))
|
||||
print(f'[{date}] ({likes} likes) {text}')
|
||||
print(f' URL: {url}')
|
||||
print()
|
||||
except Exception as e:
|
||||
print(f'Error reading: {e}', file=sys.stderr)
|
||||
" "$OUTFILE" 2>/dev/null || echo "(failed to parse)")"
|
||||
fi
|
||||
done
|
||||
log "API usage: ${API_CALLS} calls, ${API_CACHED} cached for ${AGENT}"
|
||||
# Append to cumulative usage log (create with header if new)
|
||||
USAGE_CSV="/opt/teleo-eval/logs/x-api-usage.csv"
|
||||
if [ ! -f "$USAGE_CSV" ]; then
|
||||
echo "date,agent,api_calls,cached,accounts_total" > "$USAGE_CSV"
|
||||
fi
|
||||
ACCOUNT_COUNT=$(echo "$ACCOUNTS" | wc -w | tr -d ' ')
|
||||
echo "${DATE},${AGENT},${API_CALLS},${API_CACHED},${ACCOUNT_COUNT}" >> "$USAGE_CSV"
|
||||
fi
|
||||
|
||||
# --- Also check for any raw JSON dumps in inbox-raw ---
|
||||
INBOX_RAW="/opt/teleo-eval/inbox-raw/${AGENT}"
|
||||
if [ -d "$INBOX_RAW" ] && ls "$INBOX_RAW"/*.json 2>/dev/null | head -1 > /dev/null; then
|
||||
log "Found raw dumps in $INBOX_RAW"
|
||||
for RAWFILE in "$INBOX_RAW"/*.json; do
|
||||
USERNAME=$(basename "$RAWFILE" .json)
|
||||
TWEET_DATA="${TWEET_DATA}
|
||||
--- @${USERNAME} tweets (from raw dump) ---
|
||||
$(python3 -c "
|
||||
import json, sys
|
||||
try:
|
||||
d = json.load(open(sys.argv[1]))
|
||||
tweets = d.get('tweets', d.get('data', []))
|
||||
for t in tweets[:20]:
|
||||
text = t.get('text', '')[:500]
|
||||
likes = t.get('likeCount', t.get('public_metrics', {}).get('like_count', 0))
|
||||
date = t.get('createdAt', t.get('created_at', 'unknown'))
|
||||
url = t.get('twitterUrl', t.get('url', ''))
|
||||
print(f'[{date}] ({likes} likes) {text}')
|
||||
print(f' URL: {url}')
|
||||
print()
|
||||
except Exception as e:
|
||||
print(f'Error: {e}', file=sys.stderr)
|
||||
" "$RAWFILE" 2>/dev/null || echo "(failed to parse)")"
|
||||
done
|
||||
fi
|
||||
|
||||
# --- Create branch ---
|
||||
git branch -D "$BRANCH" 2>/dev/null || true
|
||||
git checkout -b "$BRANCH" >> "$LOG" 2>&1
|
||||
log "On branch $BRANCH"
|
||||
|
||||
# --- Pre-session state ---
|
||||
if [ "$HAS_STATE" = true ]; then
|
||||
state_start_session "$AGENT" "$SESSION_ID" "research" "$DOMAIN" "$BRANCH" "sonnet" "5400" > /dev/null 2>&1 || true
|
||||
state_update_report "$AGENT" "researching" "Starting research session ${DATE}" 2>/dev/null || true
|
||||
state_journal_append "$AGENT" "session_start" "session_id=$SESSION_ID" "type=research" "branch=$BRANCH" 2>/dev/null || true
|
||||
log "Agent state: session started ($SESSION_ID)"
|
||||
fi
|
||||
|
||||
# --- Build the research prompt ---
|
||||
# Write tweet data to a temp file so Claude can read it
|
||||
echo "$TWEET_DATA" > "$TWEET_FILE"
|
||||
|
||||
RESEARCH_PROMPT="You are ${AGENT}, a Teleo knowledge base agent. Domain: ${DOMAIN}.
|
||||
|
||||
## Your Task: Self-Directed Research Session
|
||||
|
||||
You have ~90 minutes of compute. Use it wisely.
|
||||
|
||||
### Step 0: Load Operational State (1 min)
|
||||
Read /opt/teleo-eval/agent-state/${AGENT}/memory.md — this is your cross-session operational memory. It contains patterns, dead ends, open questions, and corrections from previous sessions.
|
||||
Read /opt/teleo-eval/agent-state/${AGENT}/tasks.json — check for pending tasks assigned to you.
|
||||
Check /opt/teleo-eval/agent-state/${AGENT}/inbox/ for messages from other agents. Process any high-priority inbox items before choosing your research direction.
|
||||
|
||||
### Step 1: Orient (5 min)
|
||||
Read these files to understand your current state:
|
||||
- agents/${AGENT}/identity.md (who you are)
|
||||
- agents/${AGENT}/beliefs.md (what you believe)
|
||||
- agents/${AGENT}/reasoning.md (how you think)
|
||||
- domains/${DOMAIN}/_map.md (your domain's current claims)
|
||||
|
||||
### Step 2: Identify Your Load-Bearing Beliefs (5 min)
|
||||
Read agents/${AGENT}/beliefs.md. Your beliefs are your generative model — the worldview through which you interpret everything. Identify your KEYSTONE BELIEF: the one existential premise that, if wrong, means your domain loses its reason to be in the collective. This is usually Belief 1.
|
||||
|
||||
Now ask yourself: **what would it take to prove this belief wrong?** What evidence would change your mind? Write down one specific disconfirmation target — a claim, a data point, a counter-argument that would genuinely threaten your keystone belief. You will actively search for this during Step 5.
|
||||
|
||||
This is not an exercise in self-doubt. Beliefs that survive serious challenge are STRONGER. Beliefs that have never been challenged are untested, not proven.
|
||||
|
||||
### Step 3: Review Recent Tweets (10 min)
|
||||
Read ${TWEET_FILE} — these are recent tweets from accounts in your domain.
|
||||
Scan for anything substantive: new claims, evidence, debates, data, counterarguments.
|
||||
Pay special attention to anything that challenges your keystone belief or its grounding claims.
|
||||
|
||||
### Step 4: Check Previous Follow-ups (2 min)
|
||||
Read agents/${AGENT}/musings/ — look for any previous research-*.md files. If they exist, check the 'Follow-up Directions' section at the bottom. These are threads your past self flagged but didn't have time to cover. Give them priority when picking your direction.
|
||||
|
||||
### Step 5: Pick ONE Research Question (5 min)
|
||||
Pick ONE research question — not one topic, but one question that naturally spans multiple accounts and sources. 'How is capital flowing through Solana launchpads?' is one question even though it touches MetaDAO, SOAR, Futardio.
|
||||
|
||||
**Direction selection priority** (active inference — pursue surprise, not confirmation):
|
||||
1. **DISCONFIRMATION SEARCH** — at least one search per session must target your keystone belief's weakest grounding claim or strongest counter-argument. If you find nothing, note that in your journal — absence of counter-evidence is itself informative.
|
||||
2. Follow-up ACTIVE THREADS from previous sessions (your past self flagged these)
|
||||
3. Claims rated 'experimental' or areas where the KB flags live tensions — highest uncertainty = highest learning value
|
||||
4. Evidence that CHALLENGES your beliefs, not confirms them
|
||||
5. Cross-domain connections flagged by other agents
|
||||
6. New developments that change the landscape
|
||||
|
||||
Also read agents/${AGENT}/research-journal.md if it exists — this is your cross-session pattern tracker.
|
||||
|
||||
Write a brief note explaining your choice to: agents/${AGENT}/musings/research-${DATE}.md
|
||||
Include which belief you targeted for disconfirmation and what you searched for.
|
||||
|
||||
### Step 6: Archive Sources (60 min)
|
||||
For each relevant tweet/thread, create an archive file:
|
||||
|
||||
Path: inbox/queue/YYYY-MM-DD-{author-handle}-{brief-slug}.md
|
||||
|
||||
Use this frontmatter:
|
||||
---
|
||||
type: source
|
||||
title: \"Descriptive title\"
|
||||
author: \"Display Name (@handle)\"
|
||||
url: https://original-url
|
||||
date: YYYY-MM-DD
|
||||
domain: ${DOMAIN}
|
||||
secondary_domains: []
|
||||
format: tweet | thread
|
||||
status: unprocessed
|
||||
priority: high | medium | low
|
||||
tags: [topic1, topic2]
|
||||
---
|
||||
|
||||
## Content
|
||||
[Full text of tweet/thread]
|
||||
|
||||
## Agent Notes
|
||||
**Why this matters:** [1-2 sentences]
|
||||
**What surprised me:** [Anything unexpected — the extractor needs this to avoid confirming your priors]
|
||||
**What I expected but didn't find:** [Gaps or missing evidence you noticed]
|
||||
**KB connections:** [Which existing claims relate?]
|
||||
**Extraction hints:** [What claims might an extractor pull?]
|
||||
**Context:** [Who is the author, what debate is this part of?]
|
||||
|
||||
## Curator Notes (structured handoff for extractor)
|
||||
PRIMARY CONNECTION: [exact claim title this source most relates to]
|
||||
WHY ARCHIVED: [what pattern or tension this evidences]
|
||||
EXTRACTION HINT: [what the extractor should focus on — scopes attention]
|
||||
|
||||
### Step 6 Rules:
|
||||
- Archive EVERYTHING substantive, not just what supports your views
|
||||
- Set all sources to status: unprocessed (a DIFFERENT instance will extract)
|
||||
- Flag cross-domain sources with flagged_for_{agent}: [\"reason\"]
|
||||
- Do NOT extract claims yourself — write good notes so the extractor can
|
||||
- Check inbox/queue/ and inbox/archive/ for duplicates before creating new archives
|
||||
- Aim for 5-15 source archives per session
|
||||
|
||||
### Step 7: Flag Follow-up Directions (5 min)
|
||||
At the bottom of your research musing (agents/${AGENT}/musings/research-${DATE}.md), add a section:
|
||||
|
||||
## Follow-up Directions
|
||||
|
||||
Three categories — be specific, not vague:
|
||||
|
||||
### Active Threads (continue next session)
|
||||
- [Thread]: [What to do next, what you'd look for]
|
||||
|
||||
### Dead Ends (don't re-run these)
|
||||
- [What you searched for]: [Why it was empty — saves future you from wasting time]
|
||||
|
||||
### Branching Points (one finding opened multiple directions)
|
||||
- [Finding]: [Direction A vs Direction B — which to pursue first and why]
|
||||
|
||||
### Step 8: Update Research Journal (3 min)
|
||||
Append to agents/${AGENT}/research-journal.md (create if it doesn't exist). This is your cross-session memory — NOT the same as the musing.
|
||||
|
||||
Format:
|
||||
## Session ${DATE}
|
||||
**Question:** [your research question]
|
||||
**Belief targeted:** [which keystone belief you searched to disconfirm]
|
||||
**Disconfirmation result:** [what you found — counter-evidence, absence of counter-evidence, or unexpected complication]
|
||||
**Key finding:** [most important thing you learned]
|
||||
**Pattern update:** [did this session confirm, challenge, or extend a pattern you've been tracking?]
|
||||
**Confidence shift:** [did any of your beliefs get stronger or weaker? Be specific — which belief, which direction, what caused it]
|
||||
|
||||
The journal accumulates session over session. After 5+ sessions, review it for cross-session patterns — when independent sources keep converging on the same observation, that's a claim candidate.
|
||||
|
||||
|
||||
|
||||
### Step 8.5: Write Session Digest (2 min)
|
||||
Write a JSON session digest to /opt/teleo-eval/agent-state/${AGENT}/sessions/${DATE}.json
|
||||
|
||||
This is a structured summary for human review. Be honest about what surprised you and where your confidence shifted. Format:
|
||||
|
||||
{
|
||||
\"agent\": \"${AGENT}\",
|
||||
\"date\": \"${DATE}\",
|
||||
\"research_question\": \"[the question you investigated]\",
|
||||
\"belief_targeted\": \"[which keystone belief you tried to disconfirm]\",
|
||||
\"disconfirmation_result\": \"[what you found — did the belief hold, weaken, or get complicated?]\",
|
||||
\"sources_archived\": [number],
|
||||
\"key_findings\": [
|
||||
\"[most important thing you learned — be specific, not generic]\",
|
||||
\"[second most important, if any]\"
|
||||
],
|
||||
\"surprises\": [
|
||||
\"[what you did NOT expect to find — or expected to find but didn't]\"
|
||||
],
|
||||
\"confidence_shifts\": [
|
||||
{\"belief\": \"[belief title]\", \"direction\": \"stronger|weaker|unchanged\", \"reason\": \"[one sentence why]\"}
|
||||
],
|
||||
\"prs_submitted\": [\"[branch name if you created one, empty array if not]\"],
|
||||
\"follow_ups\": [\"[specific next research directions]\"]
|
||||
}
|
||||
|
||||
Rules:
|
||||
- Be concrete. \"Found interesting data\" is useless. \"MetaDAO pass rate dropped from 78% to 52%\" is useful.
|
||||
- Surprises should be genuine — things that updated your model of the world, not things you already expected.
|
||||
- If nothing surprised you, say so honestly — that itself is informative (you may be in a filter bubble).
|
||||
- Confidence shifts: only list beliefs that actually moved. No shift is fine — report \"unchanged\" with why.
|
||||
- This file is for Cory to read each morning. Write for a human who wants to know what you learned.
|
||||
|
||||
### Step 9: Stop
|
||||
When you've finished archiving sources, updating your musing, and writing the research journal entry, STOP. Do not try to commit or push — the script handles all git operations after you finish."
|
||||
|
||||
CASCADE_PROCESSOR="/opt/teleo-eval/ops/agent-state/process-cascade-inbox.py"
|
||||
|
||||
# --- Run Claude research session ---
|
||||
log "Starting Claude research session..."
|
||||
timeout 5400 "$CLAUDE_BIN" -p "$RESEARCH_PROMPT" \
|
||||
--allowedTools 'Read,Write,Edit,Glob,Grep' \
|
||||
--model sonnet \
|
||||
--permission-mode bypassPermissions \
|
||||
>> "$LOG" 2>&1 || {
|
||||
log "WARN: Research session failed or timed out for $AGENT"
|
||||
# Process cascade inbox even on timeout (agent may have read them in Step 0)
|
||||
if [ -f "$CASCADE_PROCESSOR" ]; then
|
||||
python3 "$CASCADE_PROCESSOR" "$AGENT" 2>>"$LOG" || true
|
||||
fi
|
||||
if [ "$HAS_STATE" = true ]; then
|
||||
state_end_session "$AGENT" "timeout" "0" "null" 2>/dev/null || true
|
||||
state_update_report "$AGENT" "idle" "Research session timed out or failed on ${DATE}" 2>/dev/null || true
|
||||
state_update_metrics "$AGENT" "timeout" "0" 2>/dev/null || true
|
||||
state_journal_append "$AGENT" "session_end" "outcome=timeout" "session_id=$SESSION_ID" 2>/dev/null || true
|
||||
log "Agent state: session recorded as timeout"
|
||||
fi
|
||||
git checkout main >> "$LOG" 2>&1
|
||||
exit 1
|
||||
}
|
||||
|
||||
log "Claude session complete"
|
||||
|
||||
# --- Process cascade inbox messages (log completion to pipeline.db) ---
|
||||
if [ -f "$CASCADE_PROCESSOR" ]; then
|
||||
CASCADE_RESULT=$(python3 "$CASCADE_PROCESSOR" "$AGENT" 2>>"$LOG")
|
||||
[ -n "$CASCADE_RESULT" ] && log "Cascade: $CASCADE_RESULT"
|
||||
fi
|
||||
|
||||
# --- Check for changes ---
|
||||
CHANGED_FILES=$(git status --porcelain)
|
||||
if [ -z "$CHANGED_FILES" ]; then
|
||||
log "No sources archived by $AGENT"
|
||||
if [ "$HAS_STATE" = true ]; then
|
||||
state_end_session "$AGENT" "completed" "0" "null" 2>/dev/null || true
|
||||
state_update_report "$AGENT" "idle" "Research session completed with no new sources on ${DATE}" 2>/dev/null || true
|
||||
state_update_metrics "$AGENT" "completed" "0" 2>/dev/null || true
|
||||
state_journal_append "$AGENT" "session_end" "outcome=no_sources" "session_id=$SESSION_ID" 2>/dev/null || true
|
||||
log "Agent state: session recorded (no sources)"
|
||||
fi
|
||||
git checkout main >> "$LOG" 2>&1
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# --- Stage and commit ---
|
||||
git add inbox/queue/ agents/${AGENT}/musings/ agents/${AGENT}/research-journal.md 2>/dev/null || true
|
||||
|
||||
if git diff --cached --quiet; then
|
||||
log "No valid changes to commit"
|
||||
if [ "$HAS_STATE" = true ]; then
|
||||
state_end_session "$AGENT" "completed" "0" "null" 2>/dev/null || true
|
||||
state_update_report "$AGENT" "idle" "Research session completed with no valid changes on ${DATE}" 2>/dev/null || true
|
||||
state_update_metrics "$AGENT" "completed" "0" 2>/dev/null || true
|
||||
state_journal_append "$AGENT" "session_end" "outcome=no_valid_changes" "session_id=$SESSION_ID" 2>/dev/null || true
|
||||
fi
|
||||
git checkout main >> "$LOG" 2>&1
|
||||
exit 0
|
||||
fi
|
||||
|
||||
AGENT_UPPER=$(echo "$AGENT" | sed 's/./\U&/')
|
||||
SOURCE_COUNT=$(git diff --cached --name-only | grep -c "^inbox/queue/" || echo "0")
|
||||
git commit -m "${AGENT}: research session ${DATE} — ${SOURCE_COUNT} sources archived
|
||||
|
||||
Pentagon-Agent: ${AGENT_UPPER} <HEADLESS>" >> "$LOG" 2>&1
|
||||
|
||||
# --- Push ---
|
||||
git -c http.extraHeader="Authorization: token $AGENT_TOKEN" push -u origin "$BRANCH" --force >> "$LOG" 2>&1
|
||||
log "Pushed $BRANCH"
|
||||
|
||||
# --- Check for existing PR on this branch ---
|
||||
EXISTING_PR=$(curl -s "${FORGEJO_URL}/api/v1/repos/teleo/teleo-codex/pulls?state=open" \
|
||||
-H "Authorization: token $AGENT_TOKEN" \
|
||||
| jq -r ".[] | select(.head.ref == \"$BRANCH\") | .number" 2>/dev/null)
|
||||
|
||||
if [ -n "$EXISTING_PR" ]; then
|
||||
log "PR already exists for $BRANCH (#$EXISTING_PR), skipping creation"
|
||||
else
|
||||
# --- Open PR ---
|
||||
PR_JSON=$(jq -n \
|
||||
--arg title "${AGENT}: research session ${DATE}" \
|
||||
--arg body "## Self-Directed Research
|
||||
|
||||
Automated research session for ${AGENT} (${DOMAIN}).
|
||||
|
||||
Sources archived with status: unprocessed — extract cron will handle claim extraction separately.
|
||||
|
||||
Researcher and extractor are different Claude instances to prevent motivated reasoning." \
|
||||
--arg base "main" \
|
||||
--arg head "$BRANCH" \
|
||||
'{title: $title, body: $body, base: $base, head: $head}')
|
||||
|
||||
PR_RESULT=$(curl -s -X POST "${FORGEJO_URL}/api/v1/repos/teleo/teleo-codex/pulls" \
|
||||
-H "Authorization: token $AGENT_TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$PR_JSON" 2>&1)
|
||||
|
||||
PR_NUMBER=$(echo "$PR_RESULT" | jq -r '.number // "unknown"' 2>/dev/null || echo "unknown")
|
||||
log "PR #${PR_NUMBER} opened for ${AGENT}'s research session"
|
||||
fi
|
||||
|
||||
# --- Post-session state (success) ---
|
||||
if [ "$HAS_STATE" = true ]; then
|
||||
FINAL_PR="${EXISTING_PR:-${PR_NUMBER:-unknown}}"
|
||||
state_end_session "$AGENT" "completed" "$SOURCE_COUNT" "$FINAL_PR" 2>/dev/null || true
|
||||
state_finalize_report "$AGENT" "idle" "Research session completed: ${SOURCE_COUNT} sources archived" "$SESSION_ID" "$(date -u +%Y-%m-%dT%H:%M:%SZ)" "$(date -u +%Y-%m-%dT%H:%M:%SZ)" "completed" "$SOURCE_COUNT" "$BRANCH" "${FINAL_PR}" 2>/dev/null || true
|
||||
state_update_metrics "$AGENT" "completed" "$SOURCE_COUNT" 2>/dev/null || true
|
||||
state_journal_append "$AGENT" "session_end" "outcome=completed" "sources=$SOURCE_COUNT" "branch=$BRANCH" "pr=$FINAL_PR" 2>/dev/null || true
|
||||
log "Agent state: session finalized (${SOURCE_COUNT} sources, PR #${FINAL_PR})"
|
||||
fi
|
||||
|
||||
# --- Back to main ---
|
||||
git checkout main >> "$LOG" 2>&1
|
||||
log "=== Research session complete for $AGENT ==="
|
||||
212
research/vida-directed-session.sh
Executable file
212
research/vida-directed-session.sh
Executable file
|
|
@ -0,0 +1,212 @@
|
|||
#!/bin/bash
|
||||
# Directed research session for Vida — MA/Senior Care/International
|
||||
# Wraps research-session.sh with a custom brief injected into the prompt
|
||||
set -euo pipefail
|
||||
|
||||
AGENT="vida"
|
||||
MODEL="opus"
|
||||
REPO_DIR="/opt/teleo-eval/workspaces/research-${AGENT}"
|
||||
FORGEJO_URL="http://localhost:3000"
|
||||
FORGEJO_ADMIN_TOKEN=$(cat /opt/teleo-eval/secrets/forgejo-admin-token)
|
||||
AGENT_TOKEN=$(cat "/opt/teleo-eval/secrets/forgejo-${AGENT}-token")
|
||||
CLAUDE_BIN="/home/teleo/.local/bin/claude"
|
||||
LOG="/opt/teleo-eval/logs/research-${AGENT}.log"
|
||||
LOCKFILE="/tmp/research-${AGENT}.lock"
|
||||
DATE=$(date +%Y-%m-%d)
|
||||
BRANCH="${AGENT}/research-ma-senior-care-${DATE}"
|
||||
BRIEF_FILE="/opt/teleo-eval/vida-research-brief.md"
|
||||
DOMAIN="health"
|
||||
|
||||
log() { echo "[$(date -Iseconds)] $*" >> "$LOG"; }
|
||||
|
||||
# Lock
|
||||
if [ -f "$LOCKFILE" ]; then
|
||||
pid=$(cat "$LOCKFILE" 2>/dev/null)
|
||||
if kill -0 "$pid" 2>/dev/null; then
|
||||
log "SKIP: research session already running for $AGENT (pid $pid)"
|
||||
exit 0
|
||||
fi
|
||||
rm -f "$LOCKFILE"
|
||||
fi
|
||||
echo $$ > "$LOCKFILE"
|
||||
trap 'rm -f "$LOCKFILE"' EXIT
|
||||
|
||||
log "=== Starting DIRECTED research session for $AGENT (model: $MODEL) ==="
|
||||
log "Topic: Medicare Advantage, Senior Care, International Comparisons"
|
||||
|
||||
# Ensure repo
|
||||
if [ ! -d "$REPO_DIR/.git" ]; then
|
||||
git -c http.extraHeader="Authorization: token $FORGEJO_ADMIN_TOKEN" \
|
||||
clone "${FORGEJO_URL}/teleo/teleo-codex.git" "$REPO_DIR" >> "$LOG" 2>&1
|
||||
fi
|
||||
|
||||
cd "$REPO_DIR"
|
||||
git config credential.helper "!f() { echo username=m3taversal; echo password=$FORGEJO_ADMIN_TOKEN; }; f"
|
||||
git remote set-url origin "${FORGEJO_URL}/teleo/teleo-codex.git" 2>/dev/null || true
|
||||
git checkout main >> "$LOG" 2>&1
|
||||
git pull --rebase >> "$LOG" 2>&1 || { git rebase --abort 2>/dev/null; git reset --hard origin/main >> "$LOG" 2>&1; }
|
||||
|
||||
# Create branch
|
||||
git branch -D "$BRANCH" 2>/dev/null || true
|
||||
git checkout -b "$BRANCH" >> "$LOG" 2>&1
|
||||
|
||||
# Read the brief
|
||||
BRIEF=$(cat "$BRIEF_FILE")
|
||||
|
||||
RESEARCH_PROMPT="You are Vida, a Teleo knowledge base agent specializing in health and human flourishing.
|
||||
|
||||
## Your Task: Directed Research Session
|
||||
|
||||
You have a SPECIFIC research brief from the collective. This is not self-directed — follow the brief.
|
||||
|
||||
### Step 1: Orient (5 min)
|
||||
Read these files:
|
||||
- agents/vida/identity.md
|
||||
- agents/vida/beliefs.md
|
||||
- agents/vida/reasoning.md
|
||||
- domains/health/_map.md
|
||||
|
||||
### Step 2: Read Your Research Brief
|
||||
|
||||
${BRIEF}
|
||||
|
||||
### Step 3: Research via Web (75 min)
|
||||
|
||||
For each track, use the WebSearch and WebFetch tools to find the specific sources listed in the brief. Archive everything substantive.
|
||||
|
||||
**Search strategy:**
|
||||
- Start with the named sources (MedPAC, KFF, Commonwealth Fund, etc.)
|
||||
- Follow citations to primary data
|
||||
- Look for recent (2024-2026) analysis that synthesizes historical data
|
||||
- Don't just find one article per question — find the BEST source per question
|
||||
|
||||
For each source found, create an archive file at:
|
||||
inbox/archive/YYYY-MM-DD-{author-or-org}-{brief-slug}.md
|
||||
|
||||
Use this frontmatter:
|
||||
---
|
||||
type: source
|
||||
title: \"Descriptive title\"
|
||||
author: \"Author or Organization\"
|
||||
url: https://original-url
|
||||
date: YYYY-MM-DD
|
||||
domain: health
|
||||
secondary_domains: []
|
||||
format: report | paper | article | data
|
||||
status: unprocessed
|
||||
priority: high | medium | low
|
||||
tags: [topic1, topic2]
|
||||
---
|
||||
|
||||
## Content
|
||||
[Key excerpts, data points, findings — enough for an extractor to work with]
|
||||
|
||||
## Agent Notes
|
||||
**Why this matters:** [1-2 sentences connecting to beliefs]
|
||||
**What surprised me:** [Anything unexpected]
|
||||
**KB connections:** [Which existing health claims relate?]
|
||||
**Extraction hints:** [What claims should the extractor focus on?]
|
||||
|
||||
## Curator Notes
|
||||
PRIMARY CONNECTION: [existing claim this most relates to]
|
||||
WHY ARCHIVED: [what gap this fills]
|
||||
EXTRACTION HINT: [scope the extractor's attention]
|
||||
|
||||
### Step 3 Rules:
|
||||
- Archive EVERYTHING substantive — do NOT extract claims yourself
|
||||
- Set all sources to status: unprocessed
|
||||
- Aim for 15-25 source archives across the three tracks
|
||||
- Prioritize Track 1 (MA history) — that's the anchor
|
||||
- Check inbox/archive/ for existing sources before creating duplicates
|
||||
|
||||
### Step 4: Write Research Musing (5 min)
|
||||
Write to agents/vida/musings/research-ma-senior-care-${DATE}.md:
|
||||
- What you found across the three tracks
|
||||
- Key surprises or gaps
|
||||
- Follow-up directions for next session
|
||||
- Which of your beliefs got stronger or weaker
|
||||
|
||||
### Step 5: Update Research Journal (3 min)
|
||||
Append to agents/vida/research-journal.md (create if needed):
|
||||
## Session ${DATE} — Medicare Advantage & Senior Care
|
||||
**Question:** [primary research question]
|
||||
**Key finding:** [most important thing learned]
|
||||
**Confidence shift:** [belief updates]
|
||||
|
||||
### Step 6: Stop
|
||||
When done archiving and writing notes, STOP. Do not commit or push."
|
||||
|
||||
log "Starting Claude Opus session..."
|
||||
timeout 5400 "$CLAUDE_BIN" -p "$RESEARCH_PROMPT" \
|
||||
--allowedTools 'Read,Write,Edit,Glob,Grep,WebSearch,WebFetch' \
|
||||
--model "$MODEL" \
|
||||
--permission-mode bypassPermissions \
|
||||
>> "$LOG" 2>&1 || {
|
||||
log "WARN: Research session failed or timed out"
|
||||
# Still try to commit whatever was produced
|
||||
}
|
||||
|
||||
log "Claude session complete"
|
||||
|
||||
# Check for changes
|
||||
CHANGED_FILES=$(git status --porcelain)
|
||||
if [ -z "$CHANGED_FILES" ]; then
|
||||
log "No sources archived"
|
||||
git checkout main >> "$LOG" 2>&1
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Stage and commit
|
||||
git add inbox/archive/ agents/vida/musings/ agents/vida/research-journal.md 2>/dev/null || true
|
||||
|
||||
if git diff --cached --quiet; then
|
||||
log "No valid changes to commit"
|
||||
git checkout main >> "$LOG" 2>&1
|
||||
exit 0
|
||||
fi
|
||||
|
||||
SOURCE_COUNT=$(git diff --cached --name-only | grep -c "^inbox/archive/" || echo "0")
|
||||
git commit -m "vida: directed research — MA, senior care, international comparisons
|
||||
|
||||
- ${SOURCE_COUNT} sources archived across 3 tracks
|
||||
- Track 1: Medicare Advantage history & structure
|
||||
- Track 2: Senior care infrastructure
|
||||
- Track 3: International health system comparisons
|
||||
|
||||
Pentagon-Agent: Vida <HEADLESS>" >> "$LOG" 2>&1
|
||||
|
||||
git push -u origin "$BRANCH" --force >> "$LOG" 2>&1
|
||||
log "Pushed $BRANCH"
|
||||
|
||||
# Open PR
|
||||
EXISTING_PR=$(curl -s "${FORGEJO_URL}/api/v1/repos/teleo/teleo-codex/pulls?state=open" \
|
||||
-H "Authorization: token $AGENT_TOKEN" \
|
||||
| jq -r ".[] | select(.head.ref == \"$BRANCH\") | .number" 2>/dev/null)
|
||||
|
||||
if [ -n "$EXISTING_PR" ]; then
|
||||
log "PR already exists (#$EXISTING_PR)"
|
||||
else
|
||||
PR_JSON=$(jq -n \
|
||||
--arg title "vida: directed research — Medicare Advantage, senior care, international comparisons" \
|
||||
--arg body "## Directed Research Session
|
||||
|
||||
Three-track investigation commissioned by Cory:
|
||||
|
||||
**Track 1:** Medicare Advantage — full history from 1965 to present, risk adjustment, market structure, vertical integration
|
||||
**Track 2:** Senior care infrastructure — home health, PACE, caregiver crisis, aging demographics
|
||||
**Track 3:** International comparisons — Commonwealth Fund, Singapore, Costa Rica, NHS, Japan LTCI
|
||||
|
||||
Sources archived for extraction by the claim pipeline." \
|
||||
--arg base "main" \
|
||||
--arg head "$BRANCH" \
|
||||
'{title: $title, body: $body, base: $base, head: $head}')
|
||||
|
||||
curl -s -X POST "${FORGEJO_URL}/api/v1/repos/teleo/teleo-codex/pulls" \
|
||||
-H "Authorization: token $AGENT_TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$PR_JSON" >> "$LOG" 2>&1
|
||||
log "PR opened"
|
||||
fi
|
||||
|
||||
git checkout main >> "$LOG" 2>&1
|
||||
log "=== Directed research session complete ==="
|
||||
993
reweave.py
Normal file
993
reweave.py
Normal file
|
|
@ -0,0 +1,993 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Orphan Reweave — connect isolated claims via vector similarity + Haiku classification.
|
||||
|
||||
Finds claims with zero incoming links (orphans), uses Qdrant to find semantically
|
||||
similar neighbors, classifies the relationship with Haiku, and writes edges on the
|
||||
neighbor's frontmatter pointing TO the orphan.
|
||||
|
||||
Usage:
|
||||
python3 reweave.py --dry-run # Show what would be connected
|
||||
python3 reweave.py --max-orphans 50 # Process up to 50 orphans
|
||||
python3 reweave.py --threshold 0.72 # Override similarity floor
|
||||
|
||||
Design:
|
||||
- Orphan = zero incoming links (no other claim's supports/challenges/related/depends_on points to it)
|
||||
- Write edge on NEIGHBOR (not orphan) so orphan gains an incoming link
|
||||
- Haiku classifies: supports | challenges | related (>=0.85 confidence for supports/challenges)
|
||||
- reweave_edges parallel field for tooling-readable provenance
|
||||
- Single PR per run for Leo review
|
||||
|
||||
Pentagon-Agent: Epimetheus <0144398e-4ed3-4fe2-95a3-3d72e1abf887>
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import datetime
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
import urllib.request
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
|
||||
logger = logging.getLogger("reweave")
|
||||
|
||||
# --- Config ---
|
||||
REPO_DIR = Path(os.environ.get("REPO_DIR", "/opt/teleo-eval/workspaces/main"))
|
||||
SECRETS_DIR = Path(os.environ.get("SECRETS_DIR", "/opt/teleo-eval/secrets"))
|
||||
QDRANT_URL = os.environ.get("QDRANT_URL", "http://localhost:6333")
|
||||
QDRANT_COLLECTION = os.environ.get("QDRANT_COLLECTION", "teleo-claims")
|
||||
FORGEJO_URL = os.environ.get("FORGEJO_URL", "http://localhost:3000")
|
||||
|
||||
EMBED_DIRS = ["domains", "core", "foundations", "decisions", "entities"]
|
||||
EDGE_FIELDS = ("supports", "challenges", "challenged_by", "depends_on", "related")
|
||||
WIKI_LINK_RE = re.compile(r"\[\[([^\]]+)\]\]")
|
||||
|
||||
# Thresholds (from calibration data — Mar 28)
|
||||
DEFAULT_THRESHOLD = 0.55 # Lowered from 0.70 — text-embedding-3-small scores 0.50-0.60 on conceptual matches
|
||||
DEFAULT_MAX_ORPHANS = 50 # Keep PRs reviewable
|
||||
DEFAULT_MAX_NEIGHBORS = 3 # Don't over-connect
|
||||
HAIKU_CONFIDENCE_FLOOR = 0.85 # Below this → default to "related"
|
||||
PER_FILE_EDGE_CAP = 10 # Max total reweave edges per neighbor file
|
||||
|
||||
# Domain processing order: diversity first, internet-finance last (Leo)
|
||||
DOMAIN_PRIORITY = [
|
||||
"ai-alignment", "health", "space-development", "entertainment",
|
||||
"creative-industries", "collective-intelligence", "governance",
|
||||
# internet-finance last — batch-imported futarchy cluster, lower cross-domain value
|
||||
"internet-finance",
|
||||
]
|
||||
|
||||
|
||||
# ─── Orphan Detection ────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _parse_frontmatter(path: Path) -> dict | None:
|
||||
"""Parse YAML frontmatter from a markdown file. Returns dict or None."""
|
||||
try:
|
||||
text = path.read_text(errors="replace")
|
||||
except Exception:
|
||||
return None
|
||||
if not text.startswith("---"):
|
||||
return None
|
||||
end = text.find("\n---", 3)
|
||||
if end == -1:
|
||||
return None
|
||||
try:
|
||||
fm = yaml.safe_load(text[3:end])
|
||||
return fm if isinstance(fm, dict) else None
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _get_body(path: Path) -> str:
|
||||
"""Get body text (after frontmatter) from a markdown file."""
|
||||
try:
|
||||
text = path.read_text(errors="replace")
|
||||
except Exception:
|
||||
return ""
|
||||
if not text.startswith("---"):
|
||||
return text
|
||||
end = text.find("\n---", 3)
|
||||
if end == -1:
|
||||
return text
|
||||
return text[end + 4:].strip()
|
||||
|
||||
|
||||
def _get_edge_targets(path: Path) -> list[str]:
|
||||
"""Extract all outgoing edge targets from a claim's frontmatter + wiki links."""
|
||||
targets = []
|
||||
fm = _parse_frontmatter(path)
|
||||
if fm:
|
||||
for field in EDGE_FIELDS:
|
||||
val = fm.get(field)
|
||||
if isinstance(val, list):
|
||||
targets.extend(str(v).strip().lower() for v in val if v)
|
||||
elif isinstance(val, str) and val.strip():
|
||||
targets.append(val.strip().lower())
|
||||
# Also check reweave_edges (from previous runs)
|
||||
rw = fm.get("reweave_edges")
|
||||
if isinstance(rw, list):
|
||||
targets.extend(str(v).strip().lower() for v in rw if v)
|
||||
|
||||
# Wiki links in body
|
||||
try:
|
||||
text = path.read_text(errors="replace")
|
||||
end = text.find("\n---", 3)
|
||||
if end > 0:
|
||||
body = text[end + 4:]
|
||||
for link in WIKI_LINK_RE.findall(body):
|
||||
targets.append(link.strip().lower())
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return targets
|
||||
|
||||
|
||||
def _claim_name_variants(path: Path, repo_root: Path = None) -> list[str]:
|
||||
"""Generate name variants for a claim file (used for incoming link matching).
|
||||
|
||||
A claim at domains/ai-alignment/rlhf-reward-hacking.md could be referenced as:
|
||||
- "rlhf-reward-hacking"
|
||||
- "rlhf reward hacking"
|
||||
- "RLHF reward hacking" (title case)
|
||||
- The actual 'name' or 'title' from frontmatter
|
||||
- "domains/ai-alignment/rlhf-reward-hacking" (relative path without .md)
|
||||
"""
|
||||
variants = set()
|
||||
stem = path.stem
|
||||
variants.add(stem.lower())
|
||||
variants.add(stem.lower().replace("-", " "))
|
||||
|
||||
# Also match by relative path (Ganymede Q1: some edges use path references)
|
||||
if repo_root:
|
||||
try:
|
||||
rel = str(path.relative_to(repo_root)).removesuffix(".md")
|
||||
variants.add(rel.lower())
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
fm = _parse_frontmatter(path)
|
||||
if fm:
|
||||
for key in ("name", "title"):
|
||||
val = fm.get(key)
|
||||
if isinstance(val, str) and val.strip():
|
||||
variants.add(val.strip().lower())
|
||||
|
||||
return list(variants)
|
||||
|
||||
|
||||
def _is_entity(path: Path) -> bool:
|
||||
"""Check if a file is an entity (not a claim). Entities need different edge vocabulary."""
|
||||
fm = _parse_frontmatter(path)
|
||||
if fm and fm.get("type") == "entity":
|
||||
return True
|
||||
# Check path parts — avoids false positives on paths like "domains/entities-overview/"
|
||||
return "entities" in Path(path).parts
|
||||
|
||||
|
||||
def _same_source(path_a: Path, path_b: Path) -> bool:
|
||||
"""Check if two claims derive from the same source material.
|
||||
|
||||
Prevents self-referential edges where N claims about the same paper
|
||||
all "support" each other — inflates graph density without adding information.
|
||||
"""
|
||||
fm_a = _parse_frontmatter(path_a)
|
||||
fm_b = _parse_frontmatter(path_b)
|
||||
if not fm_a or not fm_b:
|
||||
return False
|
||||
|
||||
# Check source field
|
||||
src_a = fm_a.get("source") or fm_a.get("source_file") or ""
|
||||
src_b = fm_b.get("source") or fm_b.get("source_file") or ""
|
||||
if src_a and src_b and str(src_a).strip() == str(src_b).strip():
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def find_all_claims(repo_root: Path) -> list[Path]:
|
||||
"""Find all knowledge files (claim, framework, entity, decision) in the KB."""
|
||||
claims = []
|
||||
for d in EMBED_DIRS:
|
||||
base = repo_root / d
|
||||
if not base.is_dir():
|
||||
continue
|
||||
for md in base.rglob("*.md"):
|
||||
if md.name.startswith("_"):
|
||||
continue
|
||||
fm = _parse_frontmatter(md)
|
||||
if fm and fm.get("type") not in ("source", "musing", None):
|
||||
claims.append(md)
|
||||
return claims
|
||||
|
||||
|
||||
def build_reverse_link_index(claims: list[Path]) -> dict[str, set[Path]]:
|
||||
"""Build a reverse index: claim_name_variant → set of files that link TO it.
|
||||
|
||||
For each claim, extract all outgoing edges. For each target name, record
|
||||
the source claim as an incoming link for that target.
|
||||
"""
|
||||
# name_variant → set of source paths that point to it
|
||||
incoming: dict[str, set[Path]] = {}
|
||||
|
||||
for claim_path in claims:
|
||||
targets = _get_edge_targets(claim_path)
|
||||
for target in targets:
|
||||
if target not in incoming:
|
||||
incoming[target] = set()
|
||||
incoming[target].add(claim_path)
|
||||
|
||||
return incoming
|
||||
|
||||
|
||||
def find_orphans(claims: list[Path], incoming: dict[str, set[Path]],
|
||||
repo_root: Path = None) -> list[Path]:
|
||||
"""Find claims with zero incoming links."""
|
||||
orphans = []
|
||||
for claim_path in claims:
|
||||
variants = _claim_name_variants(claim_path, repo_root)
|
||||
has_incoming = any(
|
||||
len(incoming.get(v, set()) - {claim_path}) > 0
|
||||
for v in variants
|
||||
)
|
||||
if not has_incoming:
|
||||
orphans.append(claim_path)
|
||||
return orphans
|
||||
|
||||
|
||||
def sort_orphans_by_domain(orphans: list[Path], repo_root: Path) -> list[Path]:
|
||||
"""Sort orphans by domain priority (diversity first, internet-finance last)."""
|
||||
def domain_key(path: Path) -> tuple[int, str]:
|
||||
rel = path.relative_to(repo_root)
|
||||
parts = rel.parts
|
||||
domain = ""
|
||||
if len(parts) >= 2 and parts[0] in ("domains", "entities", "decisions"):
|
||||
domain = parts[1]
|
||||
elif parts[0] == "foundations" and len(parts) >= 2:
|
||||
domain = parts[1]
|
||||
elif parts[0] == "core":
|
||||
domain = "core"
|
||||
|
||||
try:
|
||||
priority = DOMAIN_PRIORITY.index(domain)
|
||||
except ValueError:
|
||||
# Unknown domain goes before internet-finance but after known ones
|
||||
priority = len(DOMAIN_PRIORITY) - 1
|
||||
|
||||
return (priority, path.stem)
|
||||
|
||||
return sorted(orphans, key=domain_key)
|
||||
|
||||
|
||||
# ─── Qdrant Search ───────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _get_api_key() -> str:
|
||||
"""Load OpenRouter API key."""
|
||||
key_file = SECRETS_DIR / "openrouter-key"
|
||||
if key_file.exists():
|
||||
return key_file.read_text().strip()
|
||||
key = os.environ.get("OPENROUTER_API_KEY", "")
|
||||
if key:
|
||||
return key
|
||||
logger.error("No OpenRouter API key found")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def make_point_id(rel_path: str) -> str:
|
||||
"""Deterministic point ID from repo-relative path (matches embed-claims.py)."""
|
||||
return hashlib.md5(rel_path.encode()).hexdigest()
|
||||
|
||||
|
||||
def get_vector_from_qdrant(rel_path: str) -> list[float] | None:
|
||||
"""Retrieve a claim's existing vector from Qdrant by its point ID."""
|
||||
point_id = make_point_id(rel_path)
|
||||
body = json.dumps({"ids": [point_id], "with_vector": True}).encode()
|
||||
req = urllib.request.Request(
|
||||
f"{QDRANT_URL}/collections/{QDRANT_COLLECTION}/points",
|
||||
data=body,
|
||||
headers={"Content-Type": "application/json"},
|
||||
)
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=10) as resp:
|
||||
data = json.loads(resp.read())
|
||||
points = data.get("result", [])
|
||||
if points and points[0].get("vector"):
|
||||
return points[0]["vector"]
|
||||
except Exception as e:
|
||||
logger.warning("Qdrant point lookup failed for %s: %s", rel_path, e)
|
||||
return None
|
||||
|
||||
|
||||
def search_neighbors(vector: list[float], exclude_path: str,
|
||||
threshold: float, limit: int) -> list[dict]:
|
||||
"""Search Qdrant for nearest neighbors above threshold, excluding self."""
|
||||
body = {
|
||||
"vector": vector,
|
||||
"limit": limit + 5, # over-fetch to account for self + filtered
|
||||
"with_payload": True,
|
||||
"score_threshold": threshold,
|
||||
"filter": {
|
||||
"must_not": [{"key": "claim_path", "match": {"value": exclude_path}}]
|
||||
},
|
||||
}
|
||||
req = urllib.request.Request(
|
||||
f"{QDRANT_URL}/collections/{QDRANT_COLLECTION}/points/search",
|
||||
data=json.dumps(body).encode(),
|
||||
headers={"Content-Type": "application/json"},
|
||||
)
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=10) as resp:
|
||||
data = json.loads(resp.read())
|
||||
hits = data.get("result", [])
|
||||
return hits[:limit]
|
||||
except Exception as e:
|
||||
logger.warning("Qdrant search failed: %s", e)
|
||||
return []
|
||||
|
||||
|
||||
# ─── Haiku Edge Classification ───────────────────────────────────────────────
|
||||
|
||||
|
||||
CLASSIFY_PROMPT = """You are classifying the relationship between two knowledge claims.
|
||||
|
||||
CLAIM A (the orphan — needs to be connected):
|
||||
Title: {orphan_title}
|
||||
Body: {orphan_body}
|
||||
|
||||
CLAIM B (the neighbor — already connected in the knowledge graph):
|
||||
Title: {neighbor_title}
|
||||
Body: {neighbor_body}
|
||||
|
||||
What is the relationship FROM Claim B TO Claim A?
|
||||
|
||||
Options:
|
||||
- "supports" — Claim B provides evidence, reasoning, or examples that strengthen Claim A
|
||||
- "challenges" — Claim B contradicts, undermines, or provides counter-evidence to Claim A. NOTE: "challenges" is underused — if one claim says X works and another says X fails, or they propose incompatible mechanisms, that IS a challenge. Use it.
|
||||
- "related" — Claims are topically connected but neither supports nor challenges the other. This is the WEAKEST edge — prefer supports/challenges when the relationship has directionality.
|
||||
|
||||
Respond with EXACTLY this JSON format, nothing else:
|
||||
{{"edge_type": "supports|challenges|related", "confidence": 0.0-1.0, "reason": "one sentence explanation"}}
|
||||
"""
|
||||
|
||||
|
||||
def classify_edge(orphan_title: str, orphan_body: str,
|
||||
neighbor_title: str, neighbor_body: str,
|
||||
api_key: str) -> dict:
|
||||
"""Use Haiku to classify the edge type between two claims.
|
||||
|
||||
Returns {"edge_type": str, "confidence": float, "reason": str}.
|
||||
Falls back to "related" on any failure.
|
||||
"""
|
||||
default = {"edge_type": "related", "confidence": 0.5, "reason": "classification failed"}
|
||||
|
||||
prompt = CLASSIFY_PROMPT.format(
|
||||
orphan_title=orphan_title,
|
||||
orphan_body=orphan_body[:500],
|
||||
neighbor_title=neighbor_title,
|
||||
neighbor_body=neighbor_body[:500],
|
||||
)
|
||||
|
||||
payload = json.dumps({
|
||||
"model": "anthropic/claude-3.5-haiku",
|
||||
"messages": [{"role": "user", "content": prompt}],
|
||||
"max_tokens": 200,
|
||||
"temperature": 0.3,
|
||||
}).encode()
|
||||
|
||||
req = urllib.request.Request(
|
||||
"https://openrouter.ai/api/v1/chat/completions",
|
||||
data=payload,
|
||||
headers={
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=15) as resp:
|
||||
data = json.loads(resp.read())
|
||||
content = data["choices"][0]["message"]["content"].strip()
|
||||
|
||||
# Parse JSON from response (handle markdown code blocks)
|
||||
if content.startswith("```"):
|
||||
content = content.split("\n", 1)[-1].rsplit("```", 1)[0].strip()
|
||||
|
||||
result = json.loads(content)
|
||||
edge_type = result.get("edge_type", "related")
|
||||
confidence = float(result.get("confidence", 0.5))
|
||||
|
||||
# Enforce confidence floor for supports/challenges
|
||||
if edge_type in ("supports", "challenges") and confidence < HAIKU_CONFIDENCE_FLOOR:
|
||||
edge_type = "related"
|
||||
|
||||
return {
|
||||
"edge_type": edge_type,
|
||||
"confidence": confidence,
|
||||
"reason": result.get("reason", ""),
|
||||
}
|
||||
except Exception as e:
|
||||
logger.warning("Haiku classification failed: %s", e)
|
||||
return default
|
||||
|
||||
|
||||
# ─── YAML Frontmatter Editing ────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _count_reweave_edges(path: Path) -> int:
|
||||
"""Count existing reweave_edges in a file's frontmatter."""
|
||||
fm = _parse_frontmatter(path)
|
||||
if not fm:
|
||||
return 0
|
||||
rw = fm.get("reweave_edges")
|
||||
if isinstance(rw, list):
|
||||
return len(rw)
|
||||
return 0
|
||||
|
||||
|
||||
def write_edge(neighbor_path: Path, orphan_title: str, edge_type: str,
|
||||
date_str: str, dry_run: bool = False) -> bool:
|
||||
"""Write a reweave edge on the neighbor's frontmatter.
|
||||
|
||||
Adds to both the edge_type list (related/supports/challenges) and
|
||||
the parallel reweave_edges list for provenance tracking.
|
||||
|
||||
Uses ruamel.yaml for round-trip YAML preservation.
|
||||
"""
|
||||
# Check per-file cap
|
||||
if _count_reweave_edges(neighbor_path) >= PER_FILE_EDGE_CAP:
|
||||
logger.info(" Skip %s — per-file edge cap (%d) reached", neighbor_path.name, PER_FILE_EDGE_CAP)
|
||||
return False
|
||||
|
||||
try:
|
||||
text = neighbor_path.read_text(errors="replace")
|
||||
except Exception as e:
|
||||
logger.warning(" Cannot read %s: %s", neighbor_path, e)
|
||||
return False
|
||||
|
||||
if not text.startswith("---"):
|
||||
logger.warning(" No frontmatter in %s", neighbor_path.name)
|
||||
return False
|
||||
|
||||
end = text.find("\n---", 3)
|
||||
if end == -1:
|
||||
return False
|
||||
|
||||
fm_text = text[3:end]
|
||||
body_text = text[end:] # includes the closing ---
|
||||
|
||||
# Try ruamel.yaml for round-trip editing
|
||||
try:
|
||||
from ruamel.yaml import YAML
|
||||
ry = YAML()
|
||||
ry.preserve_quotes = True
|
||||
ry.width = 4096 # prevent line wrapping
|
||||
|
||||
import io
|
||||
fm = ry.load(fm_text)
|
||||
if not isinstance(fm, dict):
|
||||
return False
|
||||
|
||||
# Add to edge_type list (related/supports/challenges)
|
||||
# Clean value only — provenance tracked in reweave_edges (Ganymede: comment-in-string bug)
|
||||
if edge_type not in fm:
|
||||
fm[edge_type] = []
|
||||
elif not isinstance(fm[edge_type], list):
|
||||
fm[edge_type] = [fm[edge_type]]
|
||||
|
||||
# Check for duplicate
|
||||
existing = [str(v).strip().lower() for v in fm[edge_type] if v]
|
||||
if orphan_title.strip().lower() in existing:
|
||||
logger.info(" Skip duplicate edge: %s → %s", neighbor_path.name, orphan_title)
|
||||
return False
|
||||
|
||||
fm[edge_type].append(orphan_title)
|
||||
|
||||
# Add to reweave_edges with provenance (edge_type + date for audit trail)
|
||||
if "reweave_edges" not in fm:
|
||||
fm["reweave_edges"] = []
|
||||
elif not isinstance(fm["reweave_edges"], list):
|
||||
fm["reweave_edges"] = [fm["reweave_edges"]]
|
||||
fm["reweave_edges"].append(f"{orphan_title}|{edge_type}|{date_str}")
|
||||
|
||||
# Serialize back
|
||||
buf = io.StringIO()
|
||||
ry.dump(fm, buf)
|
||||
new_fm = buf.getvalue().rstrip("\n")
|
||||
|
||||
new_text = f"---\n{new_fm}{body_text}"
|
||||
|
||||
if not dry_run:
|
||||
neighbor_path.write_text(new_text)
|
||||
return True
|
||||
|
||||
except ImportError:
|
||||
# Fallback: regex-based editing (no ruamel.yaml installed)
|
||||
logger.info(" ruamel.yaml not available, using regex fallback")
|
||||
return _write_edge_regex(neighbor_path, fm_text, body_text, orphan_title,
|
||||
edge_type, date_str, dry_run)
|
||||
|
||||
|
||||
def _write_edge_regex(neighbor_path: Path, fm_text: str, body_text: str,
|
||||
orphan_title: str, edge_type: str, date_str: str,
|
||||
dry_run: bool) -> bool:
|
||||
"""Fallback: add edge via regex when ruamel.yaml is unavailable."""
|
||||
# Strip leading newline from fm_text (text[3:end] includes \n after ---)
|
||||
fm_text = fm_text.lstrip("\n")
|
||||
|
||||
# Check for duplicate before writing
|
||||
existing_re = re.compile(
|
||||
rf'^\s*-\s*["\']?{re.escape(orphan_title)}["\']?\s*$',
|
||||
re.MULTILINE | re.IGNORECASE,
|
||||
)
|
||||
if existing_re.search(fm_text):
|
||||
logger.info(" Skip duplicate edge (regex): %s → %s", neighbor_path.name, orphan_title)
|
||||
return False
|
||||
|
||||
# Check if edge_type field exists
|
||||
field_re = re.compile(rf"^{edge_type}:\s*$", re.MULTILINE)
|
||||
inline_re = re.compile(rf'^{edge_type}:\s*\[', re.MULTILINE)
|
||||
|
||||
from lib.frontmatter import _yaml_quote
|
||||
entry_line = f'- {_yaml_quote(orphan_title)}'
|
||||
rw_line = f'- {_yaml_quote(orphan_title + "|" + edge_type + "|" + date_str)}'
|
||||
|
||||
if field_re.search(fm_text):
|
||||
# Multi-line list exists — find end of list, append
|
||||
lines = fm_text.split("\n")
|
||||
new_lines = []
|
||||
in_field = False
|
||||
inserted = False
|
||||
for line in lines:
|
||||
new_lines.append(line)
|
||||
if re.match(rf"^{edge_type}:\s*$", line):
|
||||
in_field = True
|
||||
elif in_field and not line.startswith(("- ", " -")):
|
||||
# End of list — insert before this line
|
||||
new_lines.insert(-1, entry_line)
|
||||
in_field = False
|
||||
inserted = True
|
||||
if in_field and not inserted:
|
||||
# Field was last in frontmatter
|
||||
new_lines.append(entry_line)
|
||||
fm_text = "\n".join(new_lines)
|
||||
|
||||
elif inline_re.search(fm_text):
|
||||
# Inline list — skip, too complex for regex
|
||||
logger.warning(" Inline list format for %s in %s, skipping", edge_type, neighbor_path.name)
|
||||
return False
|
||||
else:
|
||||
# Field doesn't exist — add at end of frontmatter
|
||||
fm_text = fm_text.rstrip("\n") + f"\n{edge_type}:\n{entry_line}"
|
||||
|
||||
# Add reweave_edges field
|
||||
if "reweave_edges:" in fm_text:
|
||||
lines = fm_text.split("\n")
|
||||
new_lines = []
|
||||
in_rw = False
|
||||
inserted_rw = False
|
||||
for line in lines:
|
||||
new_lines.append(line)
|
||||
if re.match(r"^reweave_edges:\s*$", line):
|
||||
in_rw = True
|
||||
elif in_rw and not line.startswith(("- ", " -")):
|
||||
new_lines.insert(-1, rw_line)
|
||||
in_rw = False
|
||||
inserted_rw = True
|
||||
if in_rw and not inserted_rw:
|
||||
new_lines.append(rw_line)
|
||||
fm_text = "\n".join(new_lines)
|
||||
else:
|
||||
fm_text = fm_text.rstrip("\n") + f"\nreweave_edges:\n{rw_line}"
|
||||
|
||||
new_text = f"---\n{fm_text}{body_text}"
|
||||
|
||||
if not dry_run:
|
||||
neighbor_path.write_text(new_text)
|
||||
return True
|
||||
|
||||
|
||||
# ─── Git + PR ────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def create_branch(repo_root: Path, branch_name: str) -> bool:
|
||||
"""Create and checkout a new branch from fresh origin/main.
|
||||
|
||||
Cleans up stale local/remote branches from prior failed runs, then
|
||||
fetches + resets to origin/main so the branch is never based on stale state.
|
||||
(Ship: reduces reweave merge failure rate from ~75% to near-zero by
|
||||
eliminating the stale-base problem that causes superset assertion failures
|
||||
and force-with-lease races.)
|
||||
"""
|
||||
# Delete stale local branch if it exists (e.g., from a failed earlier run today)
|
||||
subprocess.run(["git", "branch", "-D", branch_name],
|
||||
cwd=str(repo_root), capture_output=True) # ignore errors if branch doesn't exist
|
||||
|
||||
# Delete stale remote branch if it exists
|
||||
token_file = SECRETS_DIR / "forgejo-admin-token"
|
||||
if token_file.exists():
|
||||
token = token_file.read_text().strip()
|
||||
push_url = f"http://teleo:{token}@localhost:3000/teleo/teleo-codex.git"
|
||||
subprocess.run(["git", "push", push_url, "--delete", branch_name],
|
||||
cwd=str(repo_root), capture_output=True) # ignore errors if branch doesn't exist
|
||||
|
||||
# Freshen to origin/main before branching — ensures branch base matches
|
||||
# the main HEAD that _merge_reweave_pr will read at merge time.
|
||||
try:
|
||||
subprocess.run(["git", "fetch", "origin", "main"],
|
||||
cwd=str(repo_root), check=True, capture_output=True, timeout=30)
|
||||
subprocess.run(["git", "checkout", "main"],
|
||||
cwd=str(repo_root), check=True, capture_output=True)
|
||||
subprocess.run(["git", "reset", "--hard", "origin/main"],
|
||||
cwd=str(repo_root), check=True, capture_output=True)
|
||||
except (subprocess.CalledProcessError, subprocess.TimeoutExpired) as e:
|
||||
logger.error("Failed to freshen to origin/main: %s", e)
|
||||
return False
|
||||
|
||||
try:
|
||||
subprocess.run(["git", "checkout", "-b", branch_name],
|
||||
cwd=str(repo_root), check=True, capture_output=True)
|
||||
return True
|
||||
except subprocess.CalledProcessError as e:
|
||||
logger.error("Failed to create branch %s: %s", branch_name, e.stderr.decode())
|
||||
return False
|
||||
|
||||
|
||||
def commit_and_push(repo_root: Path, branch_name: str, modified_files: list[Path],
|
||||
orphan_count: int) -> bool:
|
||||
"""Stage modified files, commit, and push."""
|
||||
# Stage only modified files
|
||||
for f in modified_files:
|
||||
subprocess.run(["git", "add", str(f)], cwd=str(repo_root),
|
||||
check=True, capture_output=True)
|
||||
|
||||
# Check if anything staged
|
||||
result = subprocess.run(["git", "diff", "--cached", "--name-only"],
|
||||
cwd=str(repo_root), capture_output=True, text=True)
|
||||
if not result.stdout.strip():
|
||||
logger.info("No files staged — nothing to commit")
|
||||
return False
|
||||
|
||||
msg = (
|
||||
f"reweave: connect {orphan_count} orphan claims via vector similarity\n\n"
|
||||
f"Threshold: {DEFAULT_THRESHOLD}, Haiku classification, {len(modified_files)} files modified.\n\n"
|
||||
f"Pentagon-Agent: Epimetheus <0144398e-4ed3-4fe2-95a3-3d72e1abf887>"
|
||||
)
|
||||
subprocess.run(["git", "commit", "-m", msg], cwd=str(repo_root),
|
||||
check=True, capture_output=True)
|
||||
|
||||
# Push — inject token
|
||||
token_file = SECRETS_DIR / "forgejo-admin-token"
|
||||
if not token_file.exists():
|
||||
logger.error("No Forgejo token found at %s", token_file)
|
||||
return False
|
||||
token = token_file.read_text().strip()
|
||||
push_url = f"http://teleo:{token}@localhost:3000/teleo/teleo-codex.git"
|
||||
|
||||
subprocess.run(["git", "push", "-u", push_url, branch_name],
|
||||
cwd=str(repo_root), check=True, capture_output=True)
|
||||
return True
|
||||
|
||||
|
||||
def create_pr(branch_name: str, orphan_count: int, summary_lines: list[str]) -> str | None:
|
||||
"""Create a Forgejo PR for the reweave batch."""
|
||||
token_file = SECRETS_DIR / "forgejo-admin-token"
|
||||
if not token_file.exists():
|
||||
return None
|
||||
token = token_file.read_text().strip()
|
||||
|
||||
summary = "\n".join(f"- {line}" for line in summary_lines[:30])
|
||||
body = (
|
||||
f"## Orphan Reweave\n\n"
|
||||
f"Connected **{orphan_count}** orphan claims to the knowledge graph "
|
||||
f"via vector similarity (threshold {DEFAULT_THRESHOLD}) + Haiku edge classification.\n\n"
|
||||
f"### Edges Added\n{summary}\n\n"
|
||||
f"### Review Guide\n"
|
||||
f"- Each edge has a `# reweave:YYYY-MM-DD` comment — strip after review\n"
|
||||
f"- `reweave_edges` field tracks automated edges for tooling (graph_expand weights them 0.75x)\n"
|
||||
f"- Upgrade `related` → `supports`/`challenges` where you have better judgment\n"
|
||||
f"- Delete any edges that don't make sense\n\n"
|
||||
f"Pentagon-Agent: Epimetheus"
|
||||
)
|
||||
|
||||
payload = json.dumps({
|
||||
"title": f"reweave: connect {orphan_count} orphan claims",
|
||||
"body": body,
|
||||
"head": branch_name,
|
||||
"base": "main",
|
||||
}).encode()
|
||||
|
||||
req = urllib.request.Request(
|
||||
f"{FORGEJO_URL}/api/v1/repos/teleo/teleo-codex/pulls",
|
||||
data=payload,
|
||||
headers={
|
||||
"Authorization": f"token {token}",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=30) as resp:
|
||||
data = json.loads(resp.read())
|
||||
return data.get("html_url", "")
|
||||
except Exception as e:
|
||||
logger.error("PR creation failed: %s", e)
|
||||
return None
|
||||
|
||||
|
||||
# ─── Worktree Lock ───────────────────────────────────────────────────────────
|
||||
|
||||
_lock_fd = None # Module-level to prevent GC and avoid function-attribute fragility
|
||||
|
||||
|
||||
def acquire_lock(lock_path: Path, timeout: int = 30) -> bool:
|
||||
"""Acquire file lock for worktree access. Returns True if acquired."""
|
||||
global _lock_fd
|
||||
import fcntl
|
||||
try:
|
||||
lock_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
_lock_fd = open(lock_path, "w")
|
||||
fcntl.flock(_lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
||||
_lock_fd.write(f"reweave:{os.getpid()}\n")
|
||||
_lock_fd.flush()
|
||||
return True
|
||||
except (IOError, OSError):
|
||||
logger.warning("Could not acquire worktree lock at %s — another process has it", lock_path)
|
||||
_lock_fd = None
|
||||
return False
|
||||
|
||||
|
||||
def release_lock(lock_path: Path):
|
||||
"""Release worktree lock."""
|
||||
global _lock_fd
|
||||
import fcntl
|
||||
fd = _lock_fd
|
||||
_lock_fd = None
|
||||
if fd:
|
||||
try:
|
||||
fcntl.flock(fd, fcntl.LOCK_UN)
|
||||
fd.close()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
lock_path.unlink(missing_ok=True)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
# ─── Main ────────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def main():
|
||||
global REPO_DIR, DEFAULT_THRESHOLD
|
||||
|
||||
parser = argparse.ArgumentParser(description="Orphan Reweave — connect isolated claims")
|
||||
parser.add_argument("--dry-run", action="store_true",
|
||||
help="Show what would be connected without modifying files")
|
||||
parser.add_argument("--max-orphans", type=int, default=DEFAULT_MAX_ORPHANS,
|
||||
help=f"Max orphans to process (default {DEFAULT_MAX_ORPHANS})")
|
||||
parser.add_argument("--max-neighbors", type=int, default=DEFAULT_MAX_NEIGHBORS,
|
||||
help=f"Max neighbors per orphan (default {DEFAULT_MAX_NEIGHBORS})")
|
||||
parser.add_argument("--threshold", type=float, default=DEFAULT_THRESHOLD,
|
||||
help=f"Minimum cosine similarity (default {DEFAULT_THRESHOLD})")
|
||||
parser.add_argument("--repo-dir", type=str, default=None,
|
||||
help="Override repo directory")
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.repo_dir:
|
||||
REPO_DIR = Path(args.repo_dir)
|
||||
DEFAULT_THRESHOLD = args.threshold
|
||||
|
||||
date_str = datetime.date.today().isoformat()
|
||||
branch_name = f"reweave/{date_str}"
|
||||
|
||||
logger.info("=== Orphan Reweave ===")
|
||||
logger.info("Repo: %s", REPO_DIR)
|
||||
logger.info("Threshold: %.2f, Max orphans: %d, Max neighbors: %d",
|
||||
args.threshold, args.max_orphans, args.max_neighbors)
|
||||
if args.dry_run:
|
||||
logger.info("DRY RUN — no files will be modified")
|
||||
|
||||
# Step 1: Find all claims and build reverse-link index
|
||||
logger.info("Step 1: Scanning KB for claims...")
|
||||
claims = find_all_claims(REPO_DIR)
|
||||
logger.info(" Found %d knowledge files", len(claims))
|
||||
|
||||
logger.info("Step 2: Building reverse-link index...")
|
||||
incoming = build_reverse_link_index(claims)
|
||||
|
||||
logger.info("Step 3: Finding orphans...")
|
||||
orphans = find_orphans(claims, incoming, REPO_DIR)
|
||||
orphans = sort_orphans_by_domain(orphans, REPO_DIR)
|
||||
logger.info(" Found %d orphans (%.1f%% of %d claims)",
|
||||
len(orphans), 100 * len(orphans) / max(len(claims), 1), len(claims))
|
||||
|
||||
if not orphans:
|
||||
logger.info("No orphans found — KB is fully connected!")
|
||||
return
|
||||
|
||||
# Cap to max_orphans
|
||||
batch = orphans[:args.max_orphans]
|
||||
logger.info(" Processing batch of %d orphans", len(batch))
|
||||
|
||||
# Step 4: For each orphan, find neighbors and classify edges
|
||||
api_key = _get_api_key()
|
||||
edges_to_write: list[dict] = [] # {neighbor_path, orphan_title, edge_type, reason, score}
|
||||
skipped_no_vector = 0
|
||||
skipped_no_neighbors = 0
|
||||
skipped_entity_pair = 0
|
||||
skipped_same_source = 0
|
||||
|
||||
for i, orphan_path in enumerate(batch):
|
||||
rel_path = str(orphan_path.relative_to(REPO_DIR))
|
||||
fm = _parse_frontmatter(orphan_path)
|
||||
orphan_title = fm.get("name", fm.get("title", orphan_path.stem.replace("-", " "))) if fm else orphan_path.stem
|
||||
orphan_body = _get_body(orphan_path)
|
||||
|
||||
logger.info("[%d/%d] %s", i + 1, len(batch), orphan_title[:80])
|
||||
|
||||
# Get vector from Qdrant
|
||||
vector = get_vector_from_qdrant(rel_path)
|
||||
if not vector:
|
||||
logger.info(" No vector in Qdrant — skipping (not embedded yet)")
|
||||
skipped_no_vector += 1
|
||||
continue
|
||||
|
||||
# Find neighbors
|
||||
hits = search_neighbors(vector, rel_path, args.threshold, args.max_neighbors)
|
||||
if not hits:
|
||||
logger.info(" No neighbors above threshold %.2f", args.threshold)
|
||||
skipped_no_neighbors += 1
|
||||
continue
|
||||
|
||||
for hit in hits:
|
||||
payload = hit.get("payload", {})
|
||||
neighbor_rel = payload.get("claim_path", "")
|
||||
neighbor_title = payload.get("claim_title", "")
|
||||
score = hit.get("score", 0)
|
||||
|
||||
if not neighbor_rel:
|
||||
continue
|
||||
|
||||
neighbor_path = REPO_DIR / neighbor_rel
|
||||
if not neighbor_path.exists():
|
||||
logger.info(" Neighbor %s not found on disk — skipping", neighbor_rel)
|
||||
continue
|
||||
|
||||
# Entity-to-entity exclusion: entities need different vocabulary
|
||||
# (founded_by, competes_with, etc.) not supports/challenges
|
||||
if _is_entity(orphan_path) and _is_entity(neighbor_path):
|
||||
logger.info(" Skip entity-entity pair: %s ↔ %s", orphan_path.name, neighbor_path.name)
|
||||
skipped_entity_pair += 1
|
||||
continue
|
||||
|
||||
# Same-source exclusion: N claims from one paper all "supporting" each other
|
||||
# inflates graph density without adding information
|
||||
if _same_source(orphan_path, neighbor_path):
|
||||
logger.info(" Skip same-source pair: %s ↔ %s", orphan_path.name, neighbor_path.name)
|
||||
skipped_same_source += 1
|
||||
continue
|
||||
|
||||
neighbor_body = _get_body(neighbor_path)
|
||||
|
||||
# Classify with Haiku
|
||||
result = classify_edge(orphan_title, orphan_body,
|
||||
neighbor_title, neighbor_body, api_key)
|
||||
edge_type = result["edge_type"]
|
||||
confidence = result["confidence"]
|
||||
reason = result["reason"]
|
||||
|
||||
logger.info(" → %s (%.3f) %s [%.2f]: %s",
|
||||
neighbor_title[:50], score, edge_type, confidence, reason[:60])
|
||||
|
||||
edges_to_write.append({
|
||||
"neighbor_path": neighbor_path,
|
||||
"neighbor_rel": neighbor_rel,
|
||||
"neighbor_title": neighbor_title,
|
||||
"orphan_title": str(orphan_title),
|
||||
"orphan_rel": rel_path,
|
||||
"edge_type": edge_type,
|
||||
"score": score,
|
||||
"confidence": confidence,
|
||||
"reason": reason,
|
||||
})
|
||||
|
||||
# Rate limit courtesy
|
||||
if not args.dry_run and i < len(batch) - 1:
|
||||
time.sleep(0.3)
|
||||
|
||||
logger.info("\n=== Summary ===")
|
||||
logger.info("Orphans processed: %d", len(batch))
|
||||
logger.info("Edges to write: %d", len(edges_to_write))
|
||||
logger.info("Skipped (no vector): %d", skipped_no_vector)
|
||||
logger.info("Skipped (no neighbors): %d", skipped_no_neighbors)
|
||||
logger.info("Skipped (entity-entity): %d", skipped_entity_pair)
|
||||
logger.info("Skipped (same-source): %d", skipped_same_source)
|
||||
|
||||
if not edges_to_write:
|
||||
logger.info("Nothing to write.")
|
||||
return
|
||||
|
||||
if args.dry_run:
|
||||
logger.info("\n=== Dry Run — Edges That Would Be Written ===")
|
||||
for e in edges_to_write:
|
||||
logger.info(" %s → [%s] → %s (score=%.3f, conf=%.2f)",
|
||||
e["neighbor_title"][:40], e["edge_type"],
|
||||
e["orphan_title"][:40], e["score"], e["confidence"])
|
||||
return
|
||||
|
||||
# Step 5: Acquire lock, create branch, write edges, commit, push, create PR
|
||||
lock_path = REPO_DIR.parent / ".main-worktree.lock"
|
||||
if not acquire_lock(lock_path):
|
||||
logger.error("Cannot acquire worktree lock — aborting")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
# Create branch
|
||||
if not create_branch(REPO_DIR, branch_name):
|
||||
logger.error("Failed to create branch %s", branch_name)
|
||||
sys.exit(1)
|
||||
|
||||
# Write edges
|
||||
modified_files = set()
|
||||
written = 0
|
||||
summary_lines = []
|
||||
|
||||
for e in edges_to_write:
|
||||
ok = write_edge(
|
||||
e["neighbor_path"], e["orphan_title"], e["edge_type"],
|
||||
date_str, dry_run=False,
|
||||
)
|
||||
if ok:
|
||||
modified_files.add(e["neighbor_path"])
|
||||
written += 1
|
||||
summary_lines.append(
|
||||
f"`{e['neighbor_title'][:50]}` → [{e['edge_type']}] → "
|
||||
f"`{e['orphan_title'][:50]}` (score={e['score']:.3f})"
|
||||
)
|
||||
|
||||
logger.info("Wrote %d edges across %d files", written, len(modified_files))
|
||||
|
||||
if not modified_files:
|
||||
logger.info("No edges written — cleaning up branch")
|
||||
subprocess.run(["git", "checkout", "main"], cwd=str(REPO_DIR),
|
||||
capture_output=True)
|
||||
subprocess.run(["git", "branch", "-d", branch_name], cwd=str(REPO_DIR),
|
||||
capture_output=True)
|
||||
return
|
||||
|
||||
# Commit and push
|
||||
orphan_count = len(set(e["orphan_title"] for e in edges_to_write if e["neighbor_path"] in modified_files))
|
||||
if commit_and_push(REPO_DIR, branch_name, list(modified_files), orphan_count):
|
||||
logger.info("Pushed branch %s", branch_name)
|
||||
|
||||
# Create PR
|
||||
pr_url = create_pr(branch_name, orphan_count, summary_lines)
|
||||
if pr_url:
|
||||
logger.info("PR created: %s", pr_url)
|
||||
else:
|
||||
logger.warning("PR creation failed — branch is pushed, create manually")
|
||||
else:
|
||||
logger.error("Commit/push failed")
|
||||
|
||||
finally:
|
||||
# Always return to main — even on exception (Ganymede: branch cleanup)
|
||||
try:
|
||||
subprocess.run(["git", "checkout", "main"], cwd=str(REPO_DIR),
|
||||
capture_output=True)
|
||||
except Exception:
|
||||
pass
|
||||
release_lock(lock_path)
|
||||
|
||||
logger.info("Done.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
259
scripts/audit-wiki-links.py
Normal file
259
scripts/audit-wiki-links.py
Normal file
|
|
@ -0,0 +1,259 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Audit wiki-links across the teleo-codex knowledge base.
|
||||
|
||||
Crawls domains/, foundations/, core/, decisions/ for [[wiki-links]].
|
||||
Resolves each link against known claim files, entity files, and _map files.
|
||||
Reports dead links, orphaned claims, and link counts.
|
||||
|
||||
Output: JSON to stdout with dead links, orphans, and per-file link counts.
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import unicodedata
|
||||
from pathlib import Path
|
||||
|
||||
CODEX_ROOT = Path(os.environ.get("CODEX_ROOT", "/opt/teleo-eval/workspaces/main"))
|
||||
CLAIM_DIRS = ["domains", "foundations", "core", "decisions"]
|
||||
ENTITY_DIR = "entities"
|
||||
|
||||
WIKI_LINK_RE = re.compile(r"\[\[([^\]]+)\]\]")
|
||||
|
||||
|
||||
def slugify(title: str) -> str:
|
||||
"""Convert a wiki-link title to the kebab-case slug used for filenames."""
|
||||
s = title.strip().lower()
|
||||
s = unicodedata.normalize("NFKD", s)
|
||||
s = re.sub(r"[^\w\s-]", "", s)
|
||||
s = re.sub(r"[\s_]+", "-", s)
|
||||
s = re.sub(r"-+", "-", s)
|
||||
return s.strip("-")
|
||||
|
||||
|
||||
def build_index(codex: Path) -> dict:
|
||||
"""Build a lookup index of all resolvable targets.
|
||||
|
||||
Returns dict mapping normalized slug -> file path.
|
||||
Also maps raw stem (filename without .md) -> file path.
|
||||
"""
|
||||
index = {}
|
||||
|
||||
# Index claim files across all claim directories
|
||||
for claim_dir in CLAIM_DIRS:
|
||||
d = codex / claim_dir
|
||||
if not d.exists():
|
||||
continue
|
||||
for md in d.rglob("*.md"):
|
||||
stem = md.stem
|
||||
rel = str(md.relative_to(codex))
|
||||
# Map by stem (exact filename match)
|
||||
index[stem.lower()] = rel
|
||||
# Map by slugified stem
|
||||
index[slugify(stem)] = rel
|
||||
|
||||
# Index entity files
|
||||
entity_root = codex / ENTITY_DIR
|
||||
if entity_root.exists():
|
||||
for md in entity_root.rglob("*.md"):
|
||||
stem = md.stem
|
||||
rel = str(md.relative_to(codex))
|
||||
index[stem.lower()] = rel
|
||||
index[slugify(stem)] = rel
|
||||
|
||||
# Index maps/ directory (MOC-style overview docs)
|
||||
maps_root = codex / "maps"
|
||||
if maps_root.exists():
|
||||
for md in maps_root.rglob("*.md"):
|
||||
stem = md.stem
|
||||
rel = str(md.relative_to(codex))
|
||||
index[stem.lower()] = rel
|
||||
index[slugify(stem)] = rel
|
||||
|
||||
# Index top-level docs that might be link targets
|
||||
for special in ["overview.md", "livingip-overview.md"]:
|
||||
p = codex / special
|
||||
if p.exists():
|
||||
index[p.stem.lower()] = str(p.relative_to(codex))
|
||||
|
||||
# Index agents/ beliefs and positions (sometimes linked)
|
||||
agents_dir = codex / "agents"
|
||||
if agents_dir.exists():
|
||||
for md in agents_dir.rglob("*.md"):
|
||||
stem = md.stem
|
||||
rel = str(md.relative_to(codex))
|
||||
index[stem.lower()] = rel
|
||||
|
||||
return index
|
||||
|
||||
|
||||
def resolve_link(link_text: str, index: dict, source_dir: str) -> str | None:
|
||||
"""Try to resolve a wiki-link target. Returns file path or None."""
|
||||
text = link_text.strip()
|
||||
|
||||
# Special case: [[_map]] resolves to _map.md in the same domain directory
|
||||
if text == "_map":
|
||||
parts = source_dir.split("/")
|
||||
if len(parts) >= 2:
|
||||
candidate = f"{parts[0]}/{parts[1]}/_map.md"
|
||||
if (CODEX_ROOT / candidate).exists():
|
||||
return candidate
|
||||
return None
|
||||
|
||||
# Path-style references like [[domains/health/_map]]
|
||||
if "/" in text:
|
||||
candidate = text.rstrip("/")
|
||||
if not candidate.endswith(".md"):
|
||||
candidate += ".md"
|
||||
if (CODEX_ROOT / candidate).exists():
|
||||
return candidate
|
||||
return None
|
||||
|
||||
# Try exact stem match (lowercased)
|
||||
key = text.lower()
|
||||
if key in index:
|
||||
return index[key]
|
||||
|
||||
# Try slugified version
|
||||
slug = slugify(text)
|
||||
if slug in index:
|
||||
return index[slug]
|
||||
|
||||
# Try with common variations
|
||||
for variant in [
|
||||
slug.replace("metadaos", "metadao"),
|
||||
slug.replace("ais", "ai"),
|
||||
]:
|
||||
if variant in index:
|
||||
return index[variant]
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def audit(codex: Path) -> dict:
|
||||
"""Run the full wiki-link audit."""
|
||||
index = build_index(codex)
|
||||
|
||||
dead_links = [] # {file, link, line_number}
|
||||
link_counts = {} # file -> {outbound: N, targets: []}
|
||||
all_targets = set() # files that are linked TO
|
||||
all_files = set() # all claim/foundation files
|
||||
|
||||
# Scan all markdown files in claim directories
|
||||
for claim_dir in CLAIM_DIRS:
|
||||
d = codex / claim_dir
|
||||
if not d.exists():
|
||||
continue
|
||||
for md in d.rglob("*.md"):
|
||||
rel = str(md.relative_to(codex))
|
||||
all_files.add(rel)
|
||||
source_dir = str(md.parent.relative_to(codex))
|
||||
|
||||
try:
|
||||
content = md.read_text(encoding="utf-8")
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
links_in_file = []
|
||||
for i, line in enumerate(content.split("\n"), 1):
|
||||
for match in WIKI_LINK_RE.finditer(line):
|
||||
link_text = match.group(1)
|
||||
# Skip links with | (display text aliases) - take the target part
|
||||
if "|" in link_text:
|
||||
link_text = link_text.split("|")[0].strip()
|
||||
|
||||
resolved = resolve_link(link_text, index, source_dir)
|
||||
if resolved:
|
||||
all_targets.add(resolved)
|
||||
links_in_file.append(resolved)
|
||||
else:
|
||||
dead_links.append({
|
||||
"file": rel,
|
||||
"link": link_text,
|
||||
"line": i,
|
||||
})
|
||||
|
||||
link_counts[rel] = {
|
||||
"outbound": len(links_in_file),
|
||||
"targets": links_in_file,
|
||||
}
|
||||
|
||||
# Find orphaned claims (no inbound links AND no outbound links)
|
||||
files_with_outbound = {f for f, c in link_counts.items() if c["outbound"] > 0}
|
||||
orphaned = sorted(
|
||||
f for f in all_files
|
||||
if f not in all_targets
|
||||
and f not in files_with_outbound
|
||||
and not f.endswith("_map.md") # MOC files are structural, not orphans
|
||||
)
|
||||
|
||||
# Compute inbound link counts
|
||||
inbound_counts = {}
|
||||
for f, c in link_counts.items():
|
||||
for target in c["targets"]:
|
||||
inbound_counts[target] = inbound_counts.get(target, 0) + 1
|
||||
|
||||
# Claims with high outbound (good connectivity)
|
||||
high_connectivity = sorted(
|
||||
[(f, c["outbound"]) for f, c in link_counts.items() if c["outbound"] >= 3],
|
||||
key=lambda x: -x[1],
|
||||
)
|
||||
|
||||
# Summary stats
|
||||
total_links = sum(c["outbound"] for c in link_counts.values())
|
||||
files_with_links = sum(1 for c in link_counts.values() if c["outbound"] > 0)
|
||||
|
||||
# Domain breakdown of dead links
|
||||
dead_by_domain = {}
|
||||
for dl in dead_links:
|
||||
parts = dl["file"].split("/")
|
||||
domain = parts[1] if len(parts) >= 3 else parts[0]
|
||||
dead_by_domain[domain] = dead_by_domain.get(domain, 0) + 1
|
||||
|
||||
# Domain breakdown of orphans
|
||||
orphan_by_domain = {}
|
||||
for o in orphaned:
|
||||
parts = o.split("/")
|
||||
domain = parts[1] if len(parts) >= 3 else parts[0]
|
||||
orphan_by_domain[domain] = orphan_by_domain.get(domain, 0) + 1
|
||||
|
||||
return {
|
||||
"summary": {
|
||||
"total_files": len(all_files),
|
||||
"total_links": total_links,
|
||||
"files_with_links": files_with_links,
|
||||
"files_without_links": len(all_files) - files_with_links,
|
||||
"dead_link_count": len(dead_links),
|
||||
"orphan_count": len(orphaned),
|
||||
"avg_links_per_file": round(total_links / max(len(all_files), 1), 2),
|
||||
"high_connectivity_count": len(high_connectivity),
|
||||
},
|
||||
"dead_links": dead_links,
|
||||
"dead_by_domain": dict(sorted(dead_by_domain.items(), key=lambda x: -x[1])),
|
||||
"orphaned": orphaned,
|
||||
"orphan_by_domain": dict(sorted(orphan_by_domain.items(), key=lambda x: -x[1])),
|
||||
"high_connectivity": [{"file": f, "outbound_links": n} for f, n in high_connectivity[:20]],
|
||||
"inbound_top20": sorted(
|
||||
[{"file": f, "inbound_links": n} for f, n in inbound_counts.items()],
|
||||
key=lambda x: -x["inbound_links"],
|
||||
)[:20],
|
||||
}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
codex = Path(sys.argv[1]) if len(sys.argv) > 1 else CODEX_ROOT
|
||||
result = audit(codex)
|
||||
json.dump(result, sys.stdout, indent=2)
|
||||
print()
|
||||
|
||||
# Print human-readable summary to stderr
|
||||
s = result["summary"]
|
||||
print(f"\n=== Wiki-Link Audit ===", file=sys.stderr)
|
||||
print(f"Files scanned: {s['total_files']}", file=sys.stderr)
|
||||
print(f"Total links: {s['total_links']}", file=sys.stderr)
|
||||
print(f"Files with links: {s['files_with_links']} ({100*s['files_with_links']//max(s['total_files'],1)}%)", file=sys.stderr)
|
||||
print(f"Dead links: {s['dead_link_count']}", file=sys.stderr)
|
||||
print(f"Orphaned claims: {s['orphan_count']}", file=sys.stderr)
|
||||
print(f"Avg links/file: {s['avg_links_per_file']}", file=sys.stderr)
|
||||
print(f"High connectivity (≥3 links): {s['high_connectivity_count']}", file=sys.stderr)
|
||||
197
scripts/backfill-ci.py
Normal file
197
scripts/backfill-ci.py
Normal file
|
|
@ -0,0 +1,197 @@
|
|||
#!/usr/bin/env python3
|
||||
# ONE-SHOT BACKFILL — do not cron. Idempotent but resets all counts. (Ganymede)
|
||||
"""Backfill CI contributor attribution from git history.
|
||||
|
||||
Walks all merged PRs, reclassifies as knowledge/pipeline,
|
||||
re-derives contributor counts with corrected logic.
|
||||
|
||||
Initial claims (sourced by m3taversal, extracted by agents) get
|
||||
sourcer credit to m3taversal.
|
||||
|
||||
Usage:
|
||||
python3 backfill-ci.py [--dry-run]
|
||||
|
||||
Pentagon-Agent: Epimetheus <3D35839A-7722-4740-B93D-51157F7D5E70>
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import re
|
||||
import sqlite3
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
DB_PATH = "/opt/teleo-eval/pipeline/pipeline.db"
|
||||
REPO_DIR = "/opt/teleo-eval/workspaces/main"
|
||||
|
||||
# Static principal map
|
||||
PRINCIPAL_MAP = {
|
||||
"rio": "m3taversal",
|
||||
"leo": "m3taversal",
|
||||
"clay": "m3taversal",
|
||||
"theseus": "m3taversal",
|
||||
"vida": "m3taversal",
|
||||
"astra": "m3taversal",
|
||||
}
|
||||
|
||||
KNOWLEDGE_PREFIXES = ("domains/", "core/", "foundations/", "decisions/")
|
||||
PIPELINE_PREFIXES = ("inbox/", "entities/", "agents/")
|
||||
|
||||
|
||||
def classify_pr(conn, pr_number):
|
||||
"""Classify a merged PR as knowledge or pipeline from its DB record."""
|
||||
row = conn.execute("SELECT branch FROM prs WHERE number=?", (pr_number,)).fetchone()
|
||||
if not row or not row[0]:
|
||||
return "pipeline" # No branch info = infrastructure
|
||||
|
||||
branch = row[0]
|
||||
|
||||
# Pipeline branches are obvious
|
||||
if branch.startswith("pipeline/") or branch.startswith("entity-batch/"):
|
||||
return "pipeline"
|
||||
|
||||
# Try to get diff from git
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["git", "diff", "--name-only", f"origin/main...origin/{branch}"],
|
||||
cwd=REPO_DIR, capture_output=True, text=True, timeout=10,
|
||||
)
|
||||
if result.returncode == 0 and result.stdout.strip():
|
||||
files = result.stdout.strip().split("\n")
|
||||
if any(f.startswith(KNOWLEDGE_PREFIXES) for f in files):
|
||||
return "knowledge"
|
||||
return "pipeline"
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Fallback: check branch name patterns
|
||||
if any(branch.startswith(p) for p in ("extract/", "rio/", "leo/", "clay/", "theseus/", "vida/", "astra/")):
|
||||
return "knowledge" # Agent extraction branches are usually knowledge
|
||||
|
||||
return "pipeline"
|
||||
|
||||
|
||||
def get_pr_agent(conn, pr_number):
|
||||
"""Get the agent name for a PR from DB or branch name."""
|
||||
row = conn.execute("SELECT agent, branch FROM prs WHERE number=?", (pr_number,)).fetchone()
|
||||
if row and row[0]:
|
||||
return row[0].lower()
|
||||
if row and row[1]:
|
||||
branch = row[1]
|
||||
# Extract agent from branch prefix
|
||||
for agent in ("rio", "leo", "clay", "theseus", "vida", "astra", "epimetheus", "ganymede", "argus"):
|
||||
if branch.startswith(f"{agent}/"):
|
||||
return agent
|
||||
if branch.startswith("extract/"):
|
||||
return "epimetheus" # Pipeline extraction
|
||||
return None
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--dry-run", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
conn.row_factory = sqlite3.Row
|
||||
|
||||
# Step 1: Reset all role counts
|
||||
if not args.dry_run:
|
||||
conn.execute("""UPDATE contributors SET
|
||||
sourcer_count=0, extractor_count=0, challenger_count=0,
|
||||
synthesizer_count=0, reviewer_count=0, claims_merged=0""")
|
||||
print("Reset all contributor counts to zero")
|
||||
|
||||
# Step 2: Walk all merged PRs
|
||||
merged_prs = conn.execute(
|
||||
"SELECT number, branch, agent, origin FROM prs WHERE status='merged' ORDER BY number"
|
||||
).fetchall()
|
||||
print(f"Processing {len(merged_prs)} merged PRs")
|
||||
|
||||
knowledge_count = 0
|
||||
pipeline_count = 0
|
||||
attributed = {} # handle → {role → count}
|
||||
|
||||
for pr in merged_prs:
|
||||
pr_num = pr["number"]
|
||||
commit_type = classify_pr(conn, pr_num)
|
||||
|
||||
if commit_type == "pipeline":
|
||||
pipeline_count += 1
|
||||
if not args.dry_run:
|
||||
conn.execute("UPDATE prs SET commit_type='pipeline' WHERE number=?", (pr_num,))
|
||||
continue
|
||||
|
||||
knowledge_count += 1
|
||||
if not args.dry_run:
|
||||
conn.execute("UPDATE prs SET commit_type='knowledge' WHERE number=?", (pr_num,))
|
||||
|
||||
agent = get_pr_agent(conn, pr_num)
|
||||
|
||||
# Credit the extracting agent
|
||||
if agent:
|
||||
attributed.setdefault(agent, {"extractor": 0, "sourcer": 0, "claims": 0})
|
||||
attributed[agent]["extractor"] += 1
|
||||
attributed[agent]["claims"] += 1
|
||||
|
||||
# Credit m3taversal as sourcer for all knowledge PRs
|
||||
# (he directed the work, provided sources, seeded the KB)
|
||||
attributed.setdefault("m3taversal", {"extractor": 0, "sourcer": 0, "claims": 0})
|
||||
attributed["m3taversal"]["sourcer"] += 1
|
||||
attributed["m3taversal"]["claims"] += 1
|
||||
|
||||
print(f"\nClassified: {knowledge_count} knowledge, {pipeline_count} pipeline")
|
||||
|
||||
# Step 3: Update contributor table
|
||||
print("\n=== Attribution results ===")
|
||||
for handle, counts in sorted(attributed.items(), key=lambda x: x[1]["claims"], reverse=True):
|
||||
principal = PRINCIPAL_MAP.get(handle)
|
||||
p = f" -> {principal}" if principal else ""
|
||||
print(f" {handle}{p}: sourcer={counts['sourcer']}, extractor={counts['extractor']}, claims={counts['claims']}")
|
||||
|
||||
if not args.dry_run:
|
||||
# Upsert
|
||||
existing = conn.execute("SELECT handle FROM contributors WHERE handle=?", (handle,)).fetchone()
|
||||
if existing:
|
||||
conn.execute("""UPDATE contributors SET
|
||||
sourcer_count=?, extractor_count=?, claims_merged=?,
|
||||
principal=?
|
||||
WHERE handle=?""",
|
||||
(counts["sourcer"], counts["extractor"], counts["claims"],
|
||||
principal, handle))
|
||||
else:
|
||||
conn.execute("""INSERT INTO contributors
|
||||
(handle, sourcer_count, extractor_count, claims_merged, principal,
|
||||
first_contribution, last_contribution, tier)
|
||||
VALUES (?, ?, ?, ?, ?, date('now'), date('now'), 'contributor')""",
|
||||
(handle, counts["sourcer"], counts["extractor"], counts["claims"], principal))
|
||||
|
||||
if not args.dry_run:
|
||||
conn.commit()
|
||||
print("\nBackfill committed to DB")
|
||||
|
||||
# Verify
|
||||
weights = {"sourcer": 0.15, "extractor": 0.05, "challenger": 0.35, "synthesizer": 0.25, "reviewer": 0.20}
|
||||
print("\n=== Post-backfill CI ===")
|
||||
for r in conn.execute("""SELECT handle, principal, sourcer_count, extractor_count,
|
||||
challenger_count, synthesizer_count, reviewer_count, claims_merged
|
||||
FROM contributors ORDER BY claims_merged DESC LIMIT 10""").fetchall():
|
||||
ci = sum((r[f"{role}_count"] or 0) * w for role, w in weights.items())
|
||||
p = f" -> {r['principal']}" if r['principal'] else ""
|
||||
print(f" {r['handle']}{p}: claims={r['claims_merged']}, src={r['sourcer_count']}, ext={r['extractor_count']}, CI={round(ci, 2)}")
|
||||
|
||||
# Principal roll-up
|
||||
print("\n=== Principal roll-up ===")
|
||||
rows = conn.execute("""SELECT
|
||||
COALESCE(principal, handle) as who,
|
||||
SUM(sourcer_count) as src, SUM(extractor_count) as ext,
|
||||
SUM(challenger_count) as chl, SUM(synthesizer_count) as syn,
|
||||
SUM(reviewer_count) as rev, SUM(claims_merged) as claims
|
||||
FROM contributors GROUP BY who ORDER BY claims DESC""").fetchall()
|
||||
for r in rows:
|
||||
ci = r["src"]*0.15 + r["ext"]*0.05 + r["chl"]*0.35 + r["syn"]*0.25 + r["rev"]*0.20
|
||||
print(f" {r['who']}: claims={r['claims']}, CI={round(ci, 2)}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
60
scripts/backfill-descriptions.py
Normal file
60
scripts/backfill-descriptions.py
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Backfill description column for merged PRs that have no description.
|
||||
|
||||
Reads claim frontmatter from branches via git show (works on bare repos).
|
||||
"""
|
||||
import sqlite3
|
||||
import yaml
|
||||
import os
|
||||
import sys
|
||||
|
||||
REPO = os.environ.get("REPO_DIR", "/opt/teleo-eval/workspaces/teleo-codex.git")
|
||||
DB = os.environ.get("PIPELINE_DB", "/opt/teleo-eval/pipeline/pipeline.db")
|
||||
|
||||
|
||||
def extract_description(branch):
|
||||
result = os.popen(f"cd {REPO} && git diff --name-only origin/main...origin/{branch} 2>/dev/null").read()
|
||||
changed = [f for f in result.strip().split("\n") if f.endswith(".md") and "domains/" in f]
|
||||
descs = []
|
||||
for fpath in changed[:10]:
|
||||
content = os.popen(f"cd {REPO} && git show origin/{branch}:{fpath} 2>/dev/null").read()[:2000]
|
||||
if not content or not content.startswith("---"):
|
||||
continue
|
||||
end = content.find("---", 3)
|
||||
if end < 0:
|
||||
continue
|
||||
try:
|
||||
fm = yaml.safe_load(content[3:end])
|
||||
except Exception:
|
||||
continue
|
||||
if fm and isinstance(fm, dict) and fm.get("description"):
|
||||
d = fm["description"].strip().strip('"')
|
||||
if len(d) > 10:
|
||||
descs.append(d)
|
||||
return " | ".join(descs[:5]) if descs else None
|
||||
|
||||
|
||||
def main():
|
||||
conn = sqlite3.connect(DB)
|
||||
rows = conn.execute(
|
||||
"SELECT number, branch FROM prs WHERE status='merged' AND (description IS NULL OR description='')"
|
||||
).fetchall()
|
||||
print(f"PRs needing descriptions: {len(rows)}")
|
||||
|
||||
updated = 0
|
||||
for pr_num, branch in rows:
|
||||
desc = extract_description(branch)
|
||||
if desc:
|
||||
conn.execute("UPDATE prs SET description=? WHERE number=?", (desc, pr_num))
|
||||
updated += 1
|
||||
if updated % 50 == 0:
|
||||
conn.commit()
|
||||
print(f" ...{updated} updated")
|
||||
|
||||
conn.commit()
|
||||
conn.close()
|
||||
print(f"Done. Updated {updated}/{len(rows)} PRs with descriptions.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
193
scripts/backfill-domains.py
Normal file
193
scripts/backfill-domains.py
Normal file
|
|
@ -0,0 +1,193 @@
|
|||
#!/usr/bin/env python3
|
||||
# ONE-SHOT BACKFILL — do not cron. Idempotent.
|
||||
"""Reclassify PRs with domain='general' or NULL using file paths from diffs.
|
||||
|
||||
The extraction prompt defaults to 'general' when it can't determine domain.
|
||||
This script re-derives domains from actual file paths in merged PR diffs,
|
||||
which are more reliable than extraction-time heuristics.
|
||||
|
||||
Usage:
|
||||
python3 backfill-domains.py [--dry-run]
|
||||
|
||||
Pentagon-Agent: Epimetheus <0144398E-4ED3-4FE2-95A3-3D72E1ABF887>
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import re
|
||||
import sqlite3
|
||||
import subprocess
|
||||
from collections import Counter
|
||||
from pathlib import Path
|
||||
|
||||
DB_PATH = "/opt/teleo-eval/pipeline/pipeline.db"
|
||||
REPO_DIR = "/opt/teleo-eval/workspaces/main"
|
||||
|
||||
# Canonical domains — must match lib/domains.py DOMAIN_AGENT_MAP
|
||||
VALID_DOMAINS = frozenset({
|
||||
"internet-finance", "entertainment", "health", "ai-alignment",
|
||||
"space-development", "mechanisms", "living-capital", "living-agents",
|
||||
"teleohumanity", "grand-strategy", "critical-systems",
|
||||
"collective-intelligence", "teleological-economics", "cultural-dynamics",
|
||||
})
|
||||
|
||||
# Agent → primary domain (same as lib/domains.py)
|
||||
AGENT_PRIMARY_DOMAIN = {
|
||||
"rio": "internet-finance",
|
||||
"clay": "entertainment",
|
||||
"theseus": "ai-alignment",
|
||||
"vida": "health",
|
||||
"astra": "space-development",
|
||||
"leo": "grand-strategy",
|
||||
}
|
||||
|
||||
|
||||
def detect_domain_from_paths(file_paths: list[str]) -> str | None:
|
||||
"""Detect domain from file paths in a diff.
|
||||
|
||||
Checks domains/, entities/, core/, foundations/ directory structure.
|
||||
Returns the most frequently referenced valid domain, or None.
|
||||
"""
|
||||
domain_counts: Counter = Counter()
|
||||
for path in file_paths:
|
||||
for prefix in ("domains/", "entities/"):
|
||||
if path.startswith(prefix):
|
||||
parts = path.split("/")
|
||||
if len(parts) >= 2:
|
||||
d = parts[1]
|
||||
if d in VALID_DOMAINS:
|
||||
domain_counts[d] += 1
|
||||
break
|
||||
else:
|
||||
for prefix in ("core/", "foundations/"):
|
||||
if path.startswith(prefix):
|
||||
parts = path.split("/")
|
||||
if len(parts) >= 2:
|
||||
d = parts[1]
|
||||
if d in VALID_DOMAINS:
|
||||
domain_counts[d] += 1
|
||||
break
|
||||
|
||||
if domain_counts:
|
||||
return domain_counts.most_common(1)[0][0]
|
||||
return None
|
||||
|
||||
|
||||
def get_diff_files(pr_number: int, branch: str) -> list[str]:
|
||||
"""Get list of changed file paths for a PR from git."""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["git", "diff", "--name-only", f"origin/main...origin/{branch}"],
|
||||
capture_output=True, text=True, timeout=10,
|
||||
cwd=REPO_DIR,
|
||||
)
|
||||
if result.returncode == 0:
|
||||
return [f.strip() for f in result.stdout.strip().split("\n") if f.strip()]
|
||||
except (subprocess.TimeoutExpired, FileNotFoundError):
|
||||
pass
|
||||
|
||||
# Fallback: try merge commit if branch is gone
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["git", "log", "--merges", f"--grep=#{pr_number}", "--format=%H", "-1"],
|
||||
capture_output=True, text=True, timeout=10,
|
||||
cwd=REPO_DIR,
|
||||
)
|
||||
if result.returncode == 0 and result.stdout.strip():
|
||||
merge_sha = result.stdout.strip()
|
||||
result2 = subprocess.run(
|
||||
["git", "diff", "--name-only", f"{merge_sha}~1..{merge_sha}"],
|
||||
capture_output=True, text=True, timeout=10,
|
||||
cwd=REPO_DIR,
|
||||
)
|
||||
if result2.returncode == 0:
|
||||
return [f.strip() for f in result2.stdout.strip().split("\n") if f.strip()]
|
||||
except (subprocess.TimeoutExpired, FileNotFoundError):
|
||||
pass
|
||||
|
||||
return []
|
||||
|
||||
|
||||
def detect_domain_from_agent(agent: str | None) -> str | None:
|
||||
"""Infer domain from agent's primary domain."""
|
||||
if agent:
|
||||
return AGENT_PRIMARY_DOMAIN.get(agent.lower())
|
||||
return None
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Backfill domain for 'general'/NULL PRs")
|
||||
parser.add_argument("--dry-run", action="store_true", help="Print changes without applying")
|
||||
args = parser.parse_args()
|
||||
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
conn.row_factory = sqlite3.Row
|
||||
|
||||
# Find PRs with missing or 'general' domain
|
||||
rows = conn.execute(
|
||||
"""SELECT number, branch, domain, agent FROM prs
|
||||
WHERE status = 'merged'
|
||||
AND (domain IS NULL OR domain = 'general')
|
||||
ORDER BY number"""
|
||||
).fetchall()
|
||||
|
||||
print(f"Found {len(rows)} merged PRs with domain=NULL or 'general'")
|
||||
|
||||
reclassified = 0
|
||||
unchanged = 0
|
||||
distribution: Counter = Counter()
|
||||
log_entries = []
|
||||
|
||||
for row in rows:
|
||||
pr_num = row["number"]
|
||||
branch = row["branch"]
|
||||
old_domain = row["domain"] or "NULL"
|
||||
agent = row["agent"]
|
||||
|
||||
new_domain = None
|
||||
|
||||
# Strategy 1: File paths from diff
|
||||
if branch:
|
||||
files = get_diff_files(pr_num, branch)
|
||||
new_domain = detect_domain_from_paths(files)
|
||||
|
||||
# Strategy 2: Agent's primary domain
|
||||
if new_domain is None:
|
||||
new_domain = detect_domain_from_agent(agent)
|
||||
|
||||
if new_domain and new_domain != old_domain:
|
||||
log_entries.append(f"PR #{pr_num}: {old_domain} → {new_domain} (agent={agent}, branch={branch})")
|
||||
distribution[new_domain] += 1
|
||||
|
||||
if not args.dry_run:
|
||||
conn.execute(
|
||||
"UPDATE prs SET domain = ? WHERE number = ?",
|
||||
(new_domain, pr_num),
|
||||
)
|
||||
reclassified += 1
|
||||
else:
|
||||
unchanged += 1
|
||||
|
||||
if not args.dry_run and reclassified > 0:
|
||||
conn.commit()
|
||||
|
||||
conn.close()
|
||||
|
||||
# Report
|
||||
print(f"\nReclassified: {reclassified}")
|
||||
print(f"Unchanged (still general): {unchanged}")
|
||||
print(f"\nDistribution of reclassified PRs:")
|
||||
for domain, count in distribution.most_common():
|
||||
print(f" {domain}: {count}")
|
||||
|
||||
if log_entries:
|
||||
print(f"\nDetailed log ({len(log_entries)} changes):")
|
||||
for entry in log_entries:
|
||||
print(f" {entry}")
|
||||
|
||||
if args.dry_run:
|
||||
print("\n[DRY RUN — no changes applied]")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue