Auto: ops/research-session.sh | 1 file changed, 304 insertions(+)

2026-03-10 11:42:54 +00:00 · 2026-03-10 11:42:54 +00:00 · e2703a276c
commit e2703a276c
parent 7c1bfe8eef
1 changed files with 304 additions and 0 deletions
--- a/ops/research-session.sh
+++ b/ops/research-session.sh
@ -0,0 +1,304 @@
+#!/bin/bash
+# Run a self-directed research session for one agent.
+# Usage: ./research-session.sh <agent-name>
+# Example: ./research-session.sh clay
+#
+# What it does:
+#   1. Pulls latest tweets from the agent's network accounts (X API)
+#   2. Gives Claude the agent's identity, beliefs, and current KB state
+#   3. Agent picks a research direction and archives sources with notes
+#   4. Commits source archives to a branch, pushes, opens PR
+#   5. Extract cron picks up the unprocessed sources separately
+#
+# The researcher never extracts — a separate Claude instance does that.
+# This prevents motivated reasoning in extraction.
+
+set -euo pipefail
+
+AGENT="${1:?Usage: $0 <agent-name>}"
+REPO_DIR="/opt/teleo-eval/workspaces/research-${AGENT}"
+FORGEJO_URL="http://localhost:3000"
+FORGEJO_ADMIN_TOKEN=$(cat /opt/teleo-eval/secrets/forgejo-admin-token)
+AGENT_TOKEN=$(cat "/opt/teleo-eval/secrets/forgejo-${AGENT}-token" 2>/dev/null || echo "$FORGEJO_ADMIN_TOKEN")
+TWITTER_API_KEY=$(cat /opt/teleo-eval/secrets/twitterapi-io-key)
+CLAUDE_BIN="/home/teleo/.local/bin/claude"
+LOG_DIR="/opt/teleo-eval/logs"
+LOG="$LOG_DIR/research-${AGENT}.log"
+LOCKFILE="/tmp/research-${AGENT}.lock"
+DATE=$(date +%Y-%m-%d)
+BRANCH="${AGENT}/research-${DATE}"
+RAW_DIR="/opt/teleo-eval/research-raw/${AGENT}"
+
+log() { echo "[$(date -Iseconds)] $*" >> "$LOG"; }
+
+# --- Lock (prevent concurrent sessions for same agent) ---
+if [ -f "$LOCKFILE" ]; then
+    pid=$(cat "$LOCKFILE" 2>/dev/null)
+    if kill -0 "$pid" 2>/dev/null; then
+        log "SKIP: research session already running for $AGENT (pid $pid)"
+        exit 0
+    fi
+    log "WARN: stale lockfile for $AGENT, removing"
+    rm -f "$LOCKFILE"
+fi
+echo $$ > "$LOCKFILE"
+trap 'rm -f "$LOCKFILE"' EXIT
+
+log "=== Starting research session for $AGENT ==="
+
+# --- Ensure directories ---
+mkdir -p "$RAW_DIR" "$LOG_DIR"
+
+# --- Clone or update repo ---
+if [ ! -d "$REPO_DIR/.git" ]; then
+    log "Cloning repo for $AGENT research..."
+    git -c http.extraHeader="Authorization: token $FORGEJO_ADMIN_TOKEN" \
+        clone "${FORGEJO_URL}/teleo/teleo-codex.git" "$REPO_DIR" >> "$LOG" 2>&1
+fi
+
+cd "$REPO_DIR"
+git config credential.helper "!f() { echo username=m3taversal; echo password=$FORGEJO_ADMIN_TOKEN; }; f"
+git remote set-url origin "${FORGEJO_URL}/teleo/teleo-codex.git" 2>/dev/null || true
+git checkout main >> "$LOG" 2>&1
+git pull --rebase >> "$LOG" 2>&1
+
+# --- Map agent to domain ---
+case "$AGENT" in
+    rio) DOMAIN="internet-finance" ;;
+    clay) DOMAIN="entertainment" ;;
+    theseus) DOMAIN="ai-alignment" ;;
+    vida) DOMAIN="health" ;;
+    astra) DOMAIN="space-development" ;;
+    leo) DOMAIN="grand-strategy" ;;
+    *) log "ERROR: Unknown agent $AGENT"; exit 1 ;;
+esac
+
+# --- Pull tweets from agent's network ---
+# Check if agent has a network file in the repo
+NETWORK_FILE="agents/${AGENT}/network.json"
+if [ ! -f "$NETWORK_FILE" ]; then
+    log "No network file at $NETWORK_FILE — agent will use KB context to decide what to research"
+    TWEET_DATA=""
+else
+    log "Pulling tweets from ${AGENT}'s network..."
+    ACCOUNTS=$(python3 -c "
+import json
+with open('$NETWORK_FILE') as f:
+    data = json.load(f)
+for acct in data.get('accounts', []):
+    if acct.get('tier') in ('core', 'extended'):
+        print(acct['username'])
+" 2>/dev/null || true)
+
+    TWEET_DATA=""
+    for USERNAME in $ACCOUNTS; do
+        OUTFILE="$RAW_DIR/${USERNAME}.json"
+        # Only pull if file doesn't exist or is older than 12 hours
+        if [ ! -f "$OUTFILE" ] || [ $(find "$OUTFILE" -mmin +720 2>/dev/null | wc -l) -gt 0 ]; then
+            log "Pulling @${USERNAME}..."
+            curl -s "https://api.twitterapi.io/twitter/user/last_tweets?userName=${USERNAME}" \
+                -H "X-API-Key: ${TWITTER_API_KEY}" \
+                -o "$OUTFILE" 2>/dev/null || {
+                log "WARN: Failed to pull @${USERNAME}"
+                continue
+            }
+            sleep 2  # Rate limit courtesy
+        fi
+        if [ -f "$OUTFILE" ]; then
+            TWEET_DATA="${TWEET_DATA}
+--- @${USERNAME} tweets ---
+$(python3 -c "
+import json, sys
+try:
+    d = json.load(open('$OUTFILE'))
+    tweets = d.get('tweets', d.get('data', []))
+    for t in tweets[:20]:
+        text = t.get('text', '')[:500]
+        likes = t.get('likeCount', t.get('public_metrics', {}).get('like_count', 0))
+        date = t.get('createdAt', t.get('created_at', 'unknown'))
+        url = t.get('twitterUrl', t.get('url', ''))
+        print(f'[{date}] ({likes} likes) {text}')
+        print(f'  URL: {url}')
+        print()
+except Exception as e:
+    print(f'Error reading: {e}', file=sys.stderr)
+" 2>/dev/null || echo "(failed to parse)")"
+        fi
+    done
+fi
+
+# --- Also check for any raw JSON dumps in inbox-raw ---
+INBOX_RAW="/opt/teleo-eval/inbox-raw/${AGENT}"
+if [ -d "$INBOX_RAW" ] && ls "$INBOX_RAW"/*.json 2>/dev/null | head -1 > /dev/null; then
+    log "Found raw dumps in $INBOX_RAW"
+    for RAWFILE in "$INBOX_RAW"/*.json; do
+        USERNAME=$(basename "$RAWFILE" .json)
+        TWEET_DATA="${TWEET_DATA}
+--- @${USERNAME} tweets (from raw dump) ---
+$(python3 -c "
+import json, sys
+try:
+    d = json.load(open('$RAWFILE'))
+    tweets = d.get('tweets', d.get('data', []))
+    for t in tweets[:20]:
+        text = t.get('text', '')[:500]
+        likes = t.get('likeCount', t.get('public_metrics', {}).get('like_count', 0))
+        date = t.get('createdAt', t.get('created_at', 'unknown'))
+        url = t.get('twitterUrl', t.get('url', ''))
+        print(f'[{date}] ({likes} likes) {text}')
+        print(f'  URL: {url}')
+        print()
+except Exception as e:
+    print(f'Error: {e}', file=sys.stderr)
+" 2>/dev/null || echo "(failed to parse)")"
+    done
+fi
+
+# --- Create branch ---
+git branch -D "$BRANCH" 2>/dev/null || true
+git checkout -b "$BRANCH" >> "$LOG" 2>&1
+log "On branch $BRANCH"
+
+# --- Build the research prompt ---
+# Write tweet data to a temp file so Claude can read it
+TWEET_FILE="/tmp/research-tweets-${AGENT}.md"
+echo "$TWEET_DATA" > "$TWEET_FILE"
+
+RESEARCH_PROMPT="You are ${AGENT}, a Teleo knowledge base agent. Domain: ${DOMAIN}.
+
+## Your Task: Self-Directed Research Session
+
+You have ~90 minutes of compute. Use it wisely.
+
+### Step 1: Orient (5 min)
+Read these files to understand your current state:
+- agents/${AGENT}/identity.md (who you are)
+- agents/${AGENT}/beliefs.md (what you believe)
+- agents/${AGENT}/reasoning.md (how you think)
+- domains/${DOMAIN}/_map.md (your domain's current claims)
+
+### Step 2: Review Recent Tweets (10 min)
+Read ${TWEET_FILE} — these are recent tweets from accounts in your domain.
+Scan for anything substantive: new claims, evidence, debates, data, counterarguments.
+
+### Step 3: Pick ONE Research Direction (5 min)
+Based on your beliefs, your domain's gaps, and what you found in the tweets, pick ONE research direction that would most advance your domain understanding. Consider:
+- Gaps in your beliefs that need evidence
+- Claims in the KB that new evidence might challenge
+- Cross-domain connections flagged by other agents
+- New developments that change the landscape
+
+Write a brief note explaining your choice to: agents/${AGENT}/musings/research-${DATE}.md
+
+### Step 4: Archive Sources (60 min)
+For each relevant tweet/thread, create an archive file:
+
+Path: inbox/archive/YYYY-MM-DD-{author-handle}-{brief-slug}.md
+
+Use this frontmatter:
+---
+type: source
+title: \"Descriptive title\"
+author: \"Display Name (@handle)\"
+url: https://original-url
+date: YYYY-MM-DD
+domain: ${DOMAIN}
+secondary_domains: []
+format: tweet | thread
+status: unprocessed
+priority: high | medium | low
+tags: [topic1, topic2]
+---
+
+## Content
+[Full text of tweet/thread]
+
+## Agent Notes
+**Why this matters:** [1-2 sentences]
+**KB connections:** [Which existing claims relate?]
+**Extraction hints:** [What claims might an extractor pull?]
+**Context:** [Who is the author, what debate is this part of?]
+
+### Rules:
+- Archive EVERYTHING substantive, not just what supports your views
+- Set all sources to status: unprocessed (a DIFFERENT instance will extract)
+- Flag cross-domain sources with flagged_for_{agent}: [\"reason\"]
+- Do NOT extract claims yourself — write good notes so the extractor can
+- Check inbox/archive/ for duplicates before creating new archives
+- Aim for 5-15 source archives per session
+
+### Step 5: Commit and Push (5 min)
+Stage your archives and musing, commit with:
+  ${AGENT}: research session ${DATE} — {brief description of direction}
+
+  Pentagon-Agent: $(echo ${AGENT} | sed 's/./\U&/') <HEADLESS>
+
+Then stop. Do not open a PR — the script handles that."
+
+# --- Run Claude research session ---
+log "Starting Claude research session..."
+timeout 5400 "$CLAUDE_BIN" -p "$RESEARCH_PROMPT" \
+    --allowedTools 'Read,Write,Edit,Glob,Grep,Bash' \
+    --model sonnet \
+    --permission-mode bypassPermissions \
+    >> "$LOG" 2>&1 || {
+    log "WARN: Research session failed or timed out for $AGENT"
+    git checkout main >> "$LOG" 2>&1
+    exit 1
+}
+
+log "Claude session complete"
+
+# --- Check for changes ---
+CHANGED_FILES=$(git status --porcelain)
+if [ -z "$CHANGED_FILES" ]; then
+    log "No sources archived by $AGENT"
+    git checkout main >> "$LOG" 2>&1
+    exit 0
+fi
+
+# --- Stage and commit if Claude didn't already ---
+if ! git log --oneline -1 | grep -q "research session"; then
+    # Claude didn't commit — do it manually
+    git add inbox/archive/ agents/${AGENT}/musings/ 2>/dev/null || true
+
+    if git diff --cached --quiet; then
+        log "No valid changes to commit"
+        git checkout main >> "$LOG" 2>&1
+        exit 0
+    fi
+
+    AGENT_UPPER=$(echo "$AGENT" | sed 's/./\U&/')
+    SOURCE_COUNT=$(git diff --cached --name-only | grep -c "^inbox/archive/" || echo "0")
+    git commit -m "${AGENT}: research session ${DATE} — ${SOURCE_COUNT} sources archived
+
+Pentagon-Agent: ${AGENT_UPPER} <HEADLESS>" >> "$LOG" 2>&1
+fi
+
+# --- Push ---
+git push -u origin "$BRANCH" --force >> "$LOG" 2>&1
+log "Pushed $BRANCH"
+
+# --- Open PR ---
+PR_JSON=$(python3 -c "
+import json
+data = {
+    'title': '${AGENT}: research session ${DATE}',
+    'body': '## Self-Directed Research\\n\\nAutomated research session for ${AGENT} (${DOMAIN}).\\n\\nSources archived with status: unprocessed — extract cron will handle claim extraction separately.\\n\\nResearcher and extractor are different Claude instances to prevent motivated reasoning.',
+    'base': 'main',
+    'head': '${BRANCH}'
+}
+print(json.dumps(data))
+")
+
+PR_RESULT=$(curl -s -X POST "${FORGEJO_URL}/api/v1/repos/teleo/teleo-codex/pulls" \
+    -H "Authorization: token $AGENT_TOKEN" \
+    -H "Content-Type: application/json" \
+    -d "$PR_JSON" 2>&1)
+
+PR_NUMBER=$(echo "$PR_RESULT" | python3 -c "import sys,json; print(json.load(sys.stdin).get('number','unknown'))" 2>/dev/null || echo "unknown")
+log "PR #${PR_NUMBER} opened for ${AGENT}'s research session"
+
+# --- Back to main ---
+git checkout main >> "$LOG" 2>&1
+log "=== Research session complete for $AGENT ==="