#!/bin/bash # Run a self-directed research session for one agent. # Usage: ./research-session.sh # Example: ./research-session.sh clay # # What it does: # 1. Pulls latest tweets from the agent's network accounts (X API) # 2. Gives Claude the agent's identity, beliefs, and current KB state # 3. Agent picks a research direction and archives sources with notes # 4. Commits source archives to a branch, pushes, opens PR # 5. Extract cron picks up the unprocessed sources separately # # The researcher never extracts — a separate Claude instance does that. # This prevents motivated reasoning in extraction. set -euo pipefail AGENT="${1:?Usage: $0 }" REPO_DIR="/opt/teleo-eval/workspaces/research-${AGENT}" FORGEJO_URL="http://localhost:3000" FORGEJO_ADMIN_TOKEN=$(cat /opt/teleo-eval/secrets/forgejo-admin-token) AGENT_TOKEN=$(cat "/opt/teleo-eval/secrets/forgejo-${AGENT}-token" 2>/dev/null || echo "$FORGEJO_ADMIN_TOKEN") TWITTER_API_KEY=$(cat /opt/teleo-eval/secrets/twitterapi-io-key) CLAUDE_BIN="/home/teleo/.local/bin/claude" LOG_DIR="/opt/teleo-eval/logs" LOG="$LOG_DIR/research-${AGENT}.log" LOCKFILE="/tmp/research-${AGENT}.lock" DATE=$(date +%Y-%m-%d) BRANCH="${AGENT}/research-${DATE}" RAW_DIR="/opt/teleo-eval/research-raw/${AGENT}" log() { echo "[$(date -Iseconds)] $*" >> "$LOG"; } # --- Lock (prevent concurrent sessions for same agent) --- if [ -f "$LOCKFILE" ]; then pid=$(cat "$LOCKFILE" 2>/dev/null) if kill -0 "$pid" 2>/dev/null; then log "SKIP: research session already running for $AGENT (pid $pid)" exit 0 fi log "WARN: stale lockfile for $AGENT, removing" rm -f "$LOCKFILE" fi echo $$ > "$LOCKFILE" trap 'rm -f "$LOCKFILE"' EXIT log "=== Starting research session for $AGENT ===" # --- Ensure directories --- mkdir -p "$RAW_DIR" "$LOG_DIR" # --- Clone or update repo --- if [ ! -d "$REPO_DIR/.git" ]; then log "Cloning repo for $AGENT research..." git -c http.extraHeader="Authorization: token $FORGEJO_ADMIN_TOKEN" \ clone "${FORGEJO_URL}/teleo/teleo-codex.git" "$REPO_DIR" >> "$LOG" 2>&1 fi cd "$REPO_DIR" git config credential.helper "!f() { echo username=m3taversal; echo password=$FORGEJO_ADMIN_TOKEN; }; f" git remote set-url origin "${FORGEJO_URL}/teleo/teleo-codex.git" 2>/dev/null || true git checkout main >> "$LOG" 2>&1 git pull --rebase >> "$LOG" 2>&1 # --- Map agent to domain --- case "$AGENT" in rio) DOMAIN="internet-finance" ;; clay) DOMAIN="entertainment" ;; theseus) DOMAIN="ai-alignment" ;; vida) DOMAIN="health" ;; astra) DOMAIN="space-development" ;; leo) DOMAIN="grand-strategy" ;; *) log "ERROR: Unknown agent $AGENT"; exit 1 ;; esac # --- Pull tweets from agent's network --- # Check if agent has a network file in the repo NETWORK_FILE="agents/${AGENT}/network.json" if [ ! -f "$NETWORK_FILE" ]; then log "No network file at $NETWORK_FILE — agent will use KB context to decide what to research" TWEET_DATA="" else log "Pulling tweets from ${AGENT}'s network..." ACCOUNTS=$(python3 -c " import json with open('$NETWORK_FILE') as f: data = json.load(f) for acct in data.get('accounts', []): if acct.get('tier') in ('core', 'extended'): print(acct['username']) " 2>/dev/null || true) TWEET_DATA="" for USERNAME in $ACCOUNTS; do OUTFILE="$RAW_DIR/${USERNAME}.json" # Only pull if file doesn't exist or is older than 12 hours if [ ! -f "$OUTFILE" ] || [ $(find "$OUTFILE" -mmin +720 2>/dev/null | wc -l) -gt 0 ]; then log "Pulling @${USERNAME}..." curl -s "https://api.twitterapi.io/twitter/user/last_tweets?userName=${USERNAME}" \ -H "X-API-Key: ${TWITTER_API_KEY}" \ -o "$OUTFILE" 2>/dev/null || { log "WARN: Failed to pull @${USERNAME}" continue } sleep 2 # Rate limit courtesy fi if [ -f "$OUTFILE" ]; then TWEET_DATA="${TWEET_DATA} --- @${USERNAME} tweets --- $(python3 -c " import json, sys try: d = json.load(open('$OUTFILE')) tweets = d.get('tweets', d.get('data', [])) for t in tweets[:20]: text = t.get('text', '')[:500] likes = t.get('likeCount', t.get('public_metrics', {}).get('like_count', 0)) date = t.get('createdAt', t.get('created_at', 'unknown')) url = t.get('twitterUrl', t.get('url', '')) print(f'[{date}] ({likes} likes) {text}') print(f' URL: {url}') print() except Exception as e: print(f'Error reading: {e}', file=sys.stderr) " 2>/dev/null || echo "(failed to parse)")" fi done fi # --- Also check for any raw JSON dumps in inbox-raw --- INBOX_RAW="/opt/teleo-eval/inbox-raw/${AGENT}" if [ -d "$INBOX_RAW" ] && ls "$INBOX_RAW"/*.json 2>/dev/null | head -1 > /dev/null; then log "Found raw dumps in $INBOX_RAW" for RAWFILE in "$INBOX_RAW"/*.json; do USERNAME=$(basename "$RAWFILE" .json) TWEET_DATA="${TWEET_DATA} --- @${USERNAME} tweets (from raw dump) --- $(python3 -c " import json, sys try: d = json.load(open('$RAWFILE')) tweets = d.get('tweets', d.get('data', [])) for t in tweets[:20]: text = t.get('text', '')[:500] likes = t.get('likeCount', t.get('public_metrics', {}).get('like_count', 0)) date = t.get('createdAt', t.get('created_at', 'unknown')) url = t.get('twitterUrl', t.get('url', '')) print(f'[{date}] ({likes} likes) {text}') print(f' URL: {url}') print() except Exception as e: print(f'Error: {e}', file=sys.stderr) " 2>/dev/null || echo "(failed to parse)")" done fi # --- Create branch --- git branch -D "$BRANCH" 2>/dev/null || true git checkout -b "$BRANCH" >> "$LOG" 2>&1 log "On branch $BRANCH" # --- Build the research prompt --- # Write tweet data to a temp file so Claude can read it TWEET_FILE="/tmp/research-tweets-${AGENT}.md" echo "$TWEET_DATA" > "$TWEET_FILE" RESEARCH_PROMPT="You are ${AGENT}, a Teleo knowledge base agent. Domain: ${DOMAIN}. ## Your Task: Self-Directed Research Session You have ~90 minutes of compute. Use it wisely. ### Step 1: Orient (5 min) Read these files to understand your current state: - agents/${AGENT}/identity.md (who you are) - agents/${AGENT}/beliefs.md (what you believe) - agents/${AGENT}/reasoning.md (how you think) - domains/${DOMAIN}/_map.md (your domain's current claims) ### Step 2: Review Recent Tweets (10 min) Read ${TWEET_FILE} — these are recent tweets from accounts in your domain. Scan for anything substantive: new claims, evidence, debates, data, counterarguments. ### Step 3: Pick ONE Research Direction (5 min) Based on your beliefs, your domain's gaps, and what you found in the tweets, pick ONE research direction that would most advance your domain understanding. Consider: - Gaps in your beliefs that need evidence - Claims in the KB that new evidence might challenge - Cross-domain connections flagged by other agents - New developments that change the landscape Write a brief note explaining your choice to: agents/${AGENT}/musings/research-${DATE}.md ### Step 4: Archive Sources (60 min) For each relevant tweet/thread, create an archive file: Path: inbox/archive/YYYY-MM-DD-{author-handle}-{brief-slug}.md Use this frontmatter: --- type: source title: \"Descriptive title\" author: \"Display Name (@handle)\" url: https://original-url date: YYYY-MM-DD domain: ${DOMAIN} secondary_domains: [] format: tweet | thread status: unprocessed priority: high | medium | low tags: [topic1, topic2] --- ## Content [Full text of tweet/thread] ## Agent Notes **Why this matters:** [1-2 sentences] **KB connections:** [Which existing claims relate?] **Extraction hints:** [What claims might an extractor pull?] **Context:** [Who is the author, what debate is this part of?] ### Rules: - Archive EVERYTHING substantive, not just what supports your views - Set all sources to status: unprocessed (a DIFFERENT instance will extract) - Flag cross-domain sources with flagged_for_{agent}: [\"reason\"] - Do NOT extract claims yourself — write good notes so the extractor can - Check inbox/archive/ for duplicates before creating new archives - Aim for 5-15 source archives per session ### Step 5: Commit and Push (5 min) Stage your archives and musing, commit with: ${AGENT}: research session ${DATE} — {brief description of direction} Pentagon-Agent: $(echo ${AGENT} | sed 's/./\U&/') Then stop. Do not open a PR — the script handles that." # --- Run Claude research session --- log "Starting Claude research session..." timeout 5400 "$CLAUDE_BIN" -p "$RESEARCH_PROMPT" \ --allowedTools 'Read,Write,Edit,Glob,Grep,Bash' \ --model sonnet \ --permission-mode bypassPermissions \ >> "$LOG" 2>&1 || { log "WARN: Research session failed or timed out for $AGENT" git checkout main >> "$LOG" 2>&1 exit 1 } log "Claude session complete" # --- Check for changes --- CHANGED_FILES=$(git status --porcelain) if [ -z "$CHANGED_FILES" ]; then log "No sources archived by $AGENT" git checkout main >> "$LOG" 2>&1 exit 0 fi # --- Stage and commit if Claude didn't already --- if ! git log --oneline -1 | grep -q "research session"; then # Claude didn't commit — do it manually git add inbox/archive/ agents/${AGENT}/musings/ 2>/dev/null || true if git diff --cached --quiet; then log "No valid changes to commit" git checkout main >> "$LOG" 2>&1 exit 0 fi AGENT_UPPER=$(echo "$AGENT" | sed 's/./\U&/') SOURCE_COUNT=$(git diff --cached --name-only | grep -c "^inbox/archive/" || echo "0") git commit -m "${AGENT}: research session ${DATE} — ${SOURCE_COUNT} sources archived Pentagon-Agent: ${AGENT_UPPER} " >> "$LOG" 2>&1 fi # --- Push --- git push -u origin "$BRANCH" --force >> "$LOG" 2>&1 log "Pushed $BRANCH" # --- Open PR --- PR_JSON=$(python3 -c " import json data = { 'title': '${AGENT}: research session ${DATE}', 'body': '## Self-Directed Research\\n\\nAutomated research session for ${AGENT} (${DOMAIN}).\\n\\nSources archived with status: unprocessed — extract cron will handle claim extraction separately.\\n\\nResearcher and extractor are different Claude instances to prevent motivated reasoning.', 'base': 'main', 'head': '${BRANCH}' } print(json.dumps(data)) ") PR_RESULT=$(curl -s -X POST "${FORGEJO_URL}/api/v1/repos/teleo/teleo-codex/pulls" \ -H "Authorization: token $AGENT_TOKEN" \ -H "Content-Type: application/json" \ -d "$PR_JSON" 2>&1) PR_NUMBER=$(echo "$PR_RESULT" | python3 -c "import sys,json; print(json.load(sys.stdin).get('number','unknown'))" 2>/dev/null || echo "unknown") log "PR #${PR_NUMBER} opened for ${AGENT}'s research session" # --- Back to main --- git checkout main >> "$LOG" 2>&1 log "=== Research session complete for $AGENT ==="