Merge pull request 'leo: self-directed research architecture + Clay network' (#110) from leo/test-sources into main
This commit is contained in:
commit
736c06bb80
5 changed files with 598 additions and 0 deletions
19
agents/clay/network.json
Normal file
19
agents/clay/network.json
Normal file
|
|
@ -0,0 +1,19 @@
|
||||||
|
{
|
||||||
|
"agent": "clay",
|
||||||
|
"domain": "entertainment",
|
||||||
|
"accounts": [
|
||||||
|
{"username": "ballmatthew", "tier": "core", "why": "Definitive entertainment industry analyst — streaming economics, Metaverse thesis, creator economy frameworks."},
|
||||||
|
{"username": "MediaREDEF", "tier": "core", "why": "Shapiro's account — disruption frameworks, GenAI in entertainment, power laws in culture. Our heaviest single source (13 archived)."},
|
||||||
|
{"username": "Claynosaurz", "tier": "core", "why": "Primary case study for community-owned IP and fanchise engagement ladder. Mediawan deal is our strongest empirical anchor."},
|
||||||
|
{"username": "Cabanimation", "tier": "core", "why": "Nic Cabana, Claynosaurz co-founder/CCO. Annie-nominated animator. Inside perspective on community-to-IP pipeline."},
|
||||||
|
{"username": "jervibore", "tier": "core", "why": "Claynosaurz co-founder. Creative direction and worldbuilding."},
|
||||||
|
{"username": "AndrewsaurP", "tier": "core", "why": "Andrew Pelekis, Claynosaurz CEO. Business strategy, partnerships, franchise scaling."},
|
||||||
|
{"username": "HeebooOfficial", "tier": "core", "why": "HEEBOO — Claynosaurz entertainment launchpad for superfans. Tests IP-as-platform and co-ownership thesis."},
|
||||||
|
{"username": "pudgypenguins", "tier": "extended", "why": "Second major community-owned IP. Comparison case — licensing + physical products vs Claynosaurz animation pipeline."},
|
||||||
|
{"username": "runwayml", "tier": "extended", "why": "Leading GenAI video tool. Releases track AI-collapsed production costs."},
|
||||||
|
{"username": "pika_labs", "tier": "extended", "why": "GenAI video competitor to Runway. Track for production cost convergence evidence."},
|
||||||
|
{"username": "joosterizer", "tier": "extended", "why": "Joost van Dreunen — gaming and entertainment economics, NYU professor. Academic rigor on creator economy."},
|
||||||
|
{"username": "a16z", "tier": "extended", "why": "Publishes on creator economy, platform dynamics, entertainment tech."},
|
||||||
|
{"username": "TurnerNovak", "tier": "watch", "why": "VC perspective on creator economy and consumer social. Signal on capital flows in entertainment tech."}
|
||||||
|
]
|
||||||
|
}
|
||||||
21
agents/rio/network.json
Normal file
21
agents/rio/network.json
Normal file
|
|
@ -0,0 +1,21 @@
|
||||||
|
{
|
||||||
|
"agent": "rio",
|
||||||
|
"domain": "internet-finance",
|
||||||
|
"accounts": [
|
||||||
|
{"username": "metaproph3t", "tier": "core", "why": "MetaDAO founder, primary futarchy source."},
|
||||||
|
{"username": "MetaDAOProject", "tier": "core", "why": "Official MetaDAO account."},
|
||||||
|
{"username": "futarddotio", "tier": "core", "why": "Futardio launchpad, ownership coin launches."},
|
||||||
|
{"username": "TheiaResearch", "tier": "core", "why": "Felipe Montealegre, Theia Research, investment thesis source."},
|
||||||
|
{"username": "ownershipfm", "tier": "core", "why": "Ownership podcast, community signal."},
|
||||||
|
{"username": "PineAnalytics", "tier": "core", "why": "MetaDAO ecosystem analytics."},
|
||||||
|
{"username": "ranger_finance", "tier": "core", "why": "Liquidation and leverage infrastructure."},
|
||||||
|
{"username": "FlashTrade", "tier": "extended", "why": "Perps on Solana."},
|
||||||
|
{"username": "turbine_cash", "tier": "extended", "why": "DeFi infrastructure."},
|
||||||
|
{"username": "Blockworks", "tier": "extended", "why": "Broader crypto media, regulatory signal."},
|
||||||
|
{"username": "SolanaFloor", "tier": "extended", "why": "Solana ecosystem data."},
|
||||||
|
{"username": "01Resolved", "tier": "extended", "why": "Solana DeFi."},
|
||||||
|
{"username": "_spiz_", "tier": "extended", "why": "Solana DeFi commentary."},
|
||||||
|
{"username": "kru_tweets", "tier": "extended", "why": "Crypto market structure."},
|
||||||
|
{"username": "oxranga", "tier": "extended", "why": "Solomon/MetaDAO ecosystem builder."}
|
||||||
|
]
|
||||||
|
}
|
||||||
21
agents/theseus/network.json
Normal file
21
agents/theseus/network.json
Normal file
|
|
@ -0,0 +1,21 @@
|
||||||
|
{
|
||||||
|
"agent": "theseus",
|
||||||
|
"domain": "ai-alignment",
|
||||||
|
"accounts": [
|
||||||
|
{"username": "karpathy", "tier": "core", "why": "Autoresearch, agent architecture, delegation patterns."},
|
||||||
|
{"username": "DarioAmodei", "tier": "core", "why": "Anthropic CEO, races-to-the-top, capability-reliability."},
|
||||||
|
{"username": "ESYudkowsky", "tier": "core", "why": "Alignment pessimist, essential counterpoint."},
|
||||||
|
{"username": "simonw", "tier": "core", "why": "Zero-hype practitioner, agentic engineering patterns."},
|
||||||
|
{"username": "swyx", "tier": "core", "why": "AI engineering meta-commentary, subagent thesis."},
|
||||||
|
{"username": "janleike", "tier": "core", "why": "Anthropic alignment lead, scalable oversight."},
|
||||||
|
{"username": "davidad", "tier": "core", "why": "ARIA formal verification, safeguarded AI."},
|
||||||
|
{"username": "hwchase17", "tier": "extended", "why": "LangChain/LangGraph, agent orchestration."},
|
||||||
|
{"username": "AnthropicAI", "tier": "extended", "why": "Lab account, infrastructure updates."},
|
||||||
|
{"username": "NPCollapse", "tier": "extended", "why": "Connor Leahy, AI governance."},
|
||||||
|
{"username": "alexalbert__", "tier": "extended", "why": "Claude Code product lead."},
|
||||||
|
{"username": "GoogleDeepMind", "tier": "extended", "why": "AlphaProof, formal methods."},
|
||||||
|
{"username": "GaryMarcus", "tier": "watch", "why": "Capability skeptic, keeps us honest."},
|
||||||
|
{"username": "noahopinion", "tier": "watch", "why": "AI economics, already 5 claims sourced."},
|
||||||
|
{"username": "ylecun", "tier": "watch", "why": "Meta AI, contrarian on doom."}
|
||||||
|
]
|
||||||
|
}
|
||||||
368
ops/research-session.sh
Normal file
368
ops/research-session.sh
Normal file
|
|
@ -0,0 +1,368 @@
|
||||||
|
#!/bin/bash
|
||||||
|
# Run a self-directed research session for one agent.
|
||||||
|
# Usage: ./research-session.sh <agent-name>
|
||||||
|
# Example: ./research-session.sh clay
|
||||||
|
#
|
||||||
|
# What it does:
|
||||||
|
# 1. Pulls latest tweets from the agent's network accounts (X API)
|
||||||
|
# 2. Gives Claude the agent's identity, beliefs, and current KB state
|
||||||
|
# 3. Agent picks a research direction and archives sources with notes
|
||||||
|
# 4. Commits source archives to a branch, pushes, opens PR
|
||||||
|
# 5. Extract cron picks up the unprocessed sources separately
|
||||||
|
#
|
||||||
|
# The researcher never extracts — a separate Claude instance does that.
|
||||||
|
# This prevents motivated reasoning in extraction.
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
AGENT="${1:?Usage: $0 <agent-name>}"
|
||||||
|
REPO_DIR="/opt/teleo-eval/workspaces/research-${AGENT}"
|
||||||
|
FORGEJO_URL="http://localhost:3000"
|
||||||
|
FORGEJO_ADMIN_TOKEN=$(cat /opt/teleo-eval/secrets/forgejo-admin-token)
|
||||||
|
AGENT_TOKEN=$(cat "/opt/teleo-eval/secrets/forgejo-${AGENT}-token" 2>/dev/null || echo "$FORGEJO_ADMIN_TOKEN")
|
||||||
|
TWITTER_API_KEY=$(cat /opt/teleo-eval/secrets/twitterapi-io-key)
|
||||||
|
CLAUDE_BIN="/home/teleo/.local/bin/claude"
|
||||||
|
LOG_DIR="/opt/teleo-eval/logs"
|
||||||
|
LOG="$LOG_DIR/research-${AGENT}.log"
|
||||||
|
LOCKFILE="/tmp/research-${AGENT}.lock"
|
||||||
|
DATE=$(date +%Y-%m-%d)
|
||||||
|
BRANCH="${AGENT}/research-${DATE}"
|
||||||
|
RAW_DIR="/opt/teleo-eval/research-raw/${AGENT}"
|
||||||
|
|
||||||
|
log() { echo "[$(date -Iseconds)] $*" >> "$LOG"; }
|
||||||
|
|
||||||
|
# --- Lock (prevent concurrent sessions for same agent) ---
|
||||||
|
if [ -f "$LOCKFILE" ]; then
|
||||||
|
pid=$(cat "$LOCKFILE" 2>/dev/null)
|
||||||
|
if kill -0 "$pid" 2>/dev/null; then
|
||||||
|
log "SKIP: research session already running for $AGENT (pid $pid)"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
log "WARN: stale lockfile for $AGENT, removing"
|
||||||
|
rm -f "$LOCKFILE"
|
||||||
|
fi
|
||||||
|
echo $$ > "$LOCKFILE"
|
||||||
|
TWEET_FILE="/tmp/research-tweets-${AGENT}.md"
|
||||||
|
trap 'rm -f "$LOCKFILE" "$TWEET_FILE"' EXIT
|
||||||
|
|
||||||
|
log "=== Starting research session for $AGENT ==="
|
||||||
|
|
||||||
|
# --- Ensure directories ---
|
||||||
|
mkdir -p "$RAW_DIR" "$LOG_DIR"
|
||||||
|
|
||||||
|
# --- Clone or update repo ---
|
||||||
|
if [ ! -d "$REPO_DIR/.git" ]; then
|
||||||
|
log "Cloning repo for $AGENT research..."
|
||||||
|
git -c http.extraHeader="Authorization: token $FORGEJO_ADMIN_TOKEN" \
|
||||||
|
clone "${FORGEJO_URL}/teleo/teleo-codex.git" "$REPO_DIR" >> "$LOG" 2>&1
|
||||||
|
fi
|
||||||
|
|
||||||
|
cd "$REPO_DIR"
|
||||||
|
git config credential.helper "!f() { echo username=m3taversal; echo password=$FORGEJO_ADMIN_TOKEN; }; f"
|
||||||
|
git remote set-url origin "${FORGEJO_URL}/teleo/teleo-codex.git" 2>/dev/null || true
|
||||||
|
git checkout main >> "$LOG" 2>&1
|
||||||
|
git pull --rebase >> "$LOG" 2>&1
|
||||||
|
|
||||||
|
# --- Map agent to domain ---
|
||||||
|
case "$AGENT" in
|
||||||
|
rio) DOMAIN="internet-finance" ;;
|
||||||
|
clay) DOMAIN="entertainment" ;;
|
||||||
|
theseus) DOMAIN="ai-alignment" ;;
|
||||||
|
vida) DOMAIN="health" ;;
|
||||||
|
astra) DOMAIN="space-development" ;;
|
||||||
|
leo) DOMAIN="grand-strategy" ;;
|
||||||
|
*) log "ERROR: Unknown agent $AGENT"; exit 1 ;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
# --- Pull tweets from agent's network ---
|
||||||
|
# Check if agent has a network file in the repo
|
||||||
|
NETWORK_FILE="agents/${AGENT}/network.json"
|
||||||
|
if [ ! -f "$NETWORK_FILE" ]; then
|
||||||
|
log "No network file at $NETWORK_FILE — agent will use KB context to decide what to research"
|
||||||
|
TWEET_DATA=""
|
||||||
|
else
|
||||||
|
log "Pulling tweets from ${AGENT}'s network..."
|
||||||
|
ACCOUNTS=$(python3 -c "
|
||||||
|
import json
|
||||||
|
with open('$NETWORK_FILE') as f:
|
||||||
|
data = json.load(f)
|
||||||
|
for acct in data.get('accounts', []):
|
||||||
|
if acct.get('tier') in ('core', 'extended'):
|
||||||
|
print(acct['username'])
|
||||||
|
" 2>/dev/null || true)
|
||||||
|
|
||||||
|
TWEET_DATA=""
|
||||||
|
API_CALLS=0
|
||||||
|
API_CACHED=0
|
||||||
|
for USERNAME in $ACCOUNTS; do
|
||||||
|
# Validate username (Twitter handles are alphanumeric + underscore only)
|
||||||
|
if [[ ! "$USERNAME" =~ ^[a-zA-Z0-9_]+$ ]]; then
|
||||||
|
log "WARN: Invalid username '$USERNAME' in network file, skipping"
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
OUTFILE="$RAW_DIR/${USERNAME}.json"
|
||||||
|
# Only pull if file doesn't exist or is older than 12 hours
|
||||||
|
if [ ! -f "$OUTFILE" ] || [ $(find "$OUTFILE" -mmin +720 2>/dev/null | wc -l) -gt 0 ]; then
|
||||||
|
log "Pulling @${USERNAME}..."
|
||||||
|
curl -s "https://api.twitterapi.io/twitter/user/last_tweets?userName=${USERNAME}" \
|
||||||
|
-H "X-API-Key: ${TWITTER_API_KEY}" \
|
||||||
|
-o "$OUTFILE" 2>/dev/null || {
|
||||||
|
log "WARN: Failed to pull @${USERNAME}"
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
API_CALLS=$((API_CALLS + 1))
|
||||||
|
sleep 2 # Rate limit courtesy
|
||||||
|
else
|
||||||
|
API_CACHED=$((API_CACHED + 1))
|
||||||
|
fi
|
||||||
|
if [ -f "$OUTFILE" ]; then
|
||||||
|
TWEET_DATA="${TWEET_DATA}
|
||||||
|
--- @${USERNAME} tweets ---
|
||||||
|
$(python3 -c "
|
||||||
|
import json, sys
|
||||||
|
try:
|
||||||
|
d = json.load(open('$OUTFILE'))
|
||||||
|
tweets = d.get('tweets', d.get('data', []))
|
||||||
|
for t in tweets[:20]:
|
||||||
|
text = t.get('text', '')[:500]
|
||||||
|
likes = t.get('likeCount', t.get('public_metrics', {}).get('like_count', 0))
|
||||||
|
date = t.get('createdAt', t.get('created_at', 'unknown'))
|
||||||
|
url = t.get('twitterUrl', t.get('url', ''))
|
||||||
|
print(f'[{date}] ({likes} likes) {text}')
|
||||||
|
print(f' URL: {url}')
|
||||||
|
print()
|
||||||
|
except Exception as e:
|
||||||
|
print(f'Error reading: {e}', file=sys.stderr)
|
||||||
|
" 2>/dev/null || echo "(failed to parse)")"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
log "API usage: ${API_CALLS} calls, ${API_CACHED} cached for ${AGENT}"
|
||||||
|
# Append to cumulative usage log (create with header if new)
|
||||||
|
USAGE_CSV="/opt/teleo-eval/logs/x-api-usage.csv"
|
||||||
|
if [ ! -f "$USAGE_CSV" ]; then
|
||||||
|
echo "date,agent,api_calls,cached,accounts_total" > "$USAGE_CSV"
|
||||||
|
fi
|
||||||
|
ACCOUNT_COUNT=$(echo "$ACCOUNTS" | wc -w | tr -d ' ')
|
||||||
|
echo "${DATE},${AGENT},${API_CALLS},${API_CACHED},${ACCOUNT_COUNT}" >> "$USAGE_CSV"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# --- Also check for any raw JSON dumps in inbox-raw ---
|
||||||
|
INBOX_RAW="/opt/teleo-eval/inbox-raw/${AGENT}"
|
||||||
|
if [ -d "$INBOX_RAW" ] && ls "$INBOX_RAW"/*.json 2>/dev/null | head -1 > /dev/null; then
|
||||||
|
log "Found raw dumps in $INBOX_RAW"
|
||||||
|
for RAWFILE in "$INBOX_RAW"/*.json; do
|
||||||
|
USERNAME=$(basename "$RAWFILE" .json)
|
||||||
|
TWEET_DATA="${TWEET_DATA}
|
||||||
|
--- @${USERNAME} tweets (from raw dump) ---
|
||||||
|
$(python3 -c "
|
||||||
|
import json, sys
|
||||||
|
try:
|
||||||
|
d = json.load(open('$RAWFILE'))
|
||||||
|
tweets = d.get('tweets', d.get('data', []))
|
||||||
|
for t in tweets[:20]:
|
||||||
|
text = t.get('text', '')[:500]
|
||||||
|
likes = t.get('likeCount', t.get('public_metrics', {}).get('like_count', 0))
|
||||||
|
date = t.get('createdAt', t.get('created_at', 'unknown'))
|
||||||
|
url = t.get('twitterUrl', t.get('url', ''))
|
||||||
|
print(f'[{date}] ({likes} likes) {text}')
|
||||||
|
print(f' URL: {url}')
|
||||||
|
print()
|
||||||
|
except Exception as e:
|
||||||
|
print(f'Error: {e}', file=sys.stderr)
|
||||||
|
" 2>/dev/null || echo "(failed to parse)")"
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
|
||||||
|
# --- Create branch ---
|
||||||
|
git branch -D "$BRANCH" 2>/dev/null || true
|
||||||
|
git checkout -b "$BRANCH" >> "$LOG" 2>&1
|
||||||
|
log "On branch $BRANCH"
|
||||||
|
|
||||||
|
# --- Build the research prompt ---
|
||||||
|
# Write tweet data to a temp file so Claude can read it
|
||||||
|
echo "$TWEET_DATA" > "$TWEET_FILE"
|
||||||
|
|
||||||
|
RESEARCH_PROMPT="You are ${AGENT}, a Teleo knowledge base agent. Domain: ${DOMAIN}.
|
||||||
|
|
||||||
|
## Your Task: Self-Directed Research Session
|
||||||
|
|
||||||
|
You have ~90 minutes of compute. Use it wisely.
|
||||||
|
|
||||||
|
### Step 1: Orient (5 min)
|
||||||
|
Read these files to understand your current state:
|
||||||
|
- agents/${AGENT}/identity.md (who you are)
|
||||||
|
- agents/${AGENT}/beliefs.md (what you believe)
|
||||||
|
- agents/${AGENT}/reasoning.md (how you think)
|
||||||
|
- domains/${DOMAIN}/_map.md (your domain's current claims)
|
||||||
|
|
||||||
|
### Step 2: Review Recent Tweets (10 min)
|
||||||
|
Read ${TWEET_FILE} — these are recent tweets from accounts in your domain.
|
||||||
|
Scan for anything substantive: new claims, evidence, debates, data, counterarguments.
|
||||||
|
|
||||||
|
### Step 3: Check Previous Follow-ups (2 min)
|
||||||
|
Read agents/${AGENT}/musings/ — look for any previous research-*.md files. If they exist, check the 'Follow-up Directions' section at the bottom. These are threads your past self flagged but didn't have time to cover. Give them priority when picking your direction.
|
||||||
|
|
||||||
|
### Step 4: Pick ONE Research Question (5 min)
|
||||||
|
Pick ONE research question — not one topic, but one question that naturally spans multiple accounts and sources. 'How is capital flowing through Solana launchpads?' is one question even though it touches MetaDAO, SOAR, Futardio.
|
||||||
|
|
||||||
|
**Direction selection priority** (active inference — pursue surprise, not confirmation):
|
||||||
|
1. Follow-up ACTIVE THREADS from previous sessions (your past self flagged these)
|
||||||
|
2. Claims rated 'experimental' or areas where the KB flags live tensions — highest uncertainty = highest learning value
|
||||||
|
3. Evidence that CHALLENGES your beliefs, not confirms them
|
||||||
|
4. Cross-domain connections flagged by other agents
|
||||||
|
5. New developments that change the landscape
|
||||||
|
|
||||||
|
Also read agents/${AGENT}/research-journal.md if it exists — this is your cross-session pattern tracker.
|
||||||
|
|
||||||
|
Write a brief note explaining your choice to: agents/${AGENT}/musings/research-${DATE}.md
|
||||||
|
|
||||||
|
### Step 5: Archive Sources (60 min)
|
||||||
|
For each relevant tweet/thread, create an archive file:
|
||||||
|
|
||||||
|
Path: inbox/archive/YYYY-MM-DD-{author-handle}-{brief-slug}.md
|
||||||
|
|
||||||
|
Use this frontmatter:
|
||||||
|
---
|
||||||
|
type: source
|
||||||
|
title: \"Descriptive title\"
|
||||||
|
author: \"Display Name (@handle)\"
|
||||||
|
url: https://original-url
|
||||||
|
date: YYYY-MM-DD
|
||||||
|
domain: ${DOMAIN}
|
||||||
|
secondary_domains: []
|
||||||
|
format: tweet | thread
|
||||||
|
status: unprocessed
|
||||||
|
priority: high | medium | low
|
||||||
|
tags: [topic1, topic2]
|
||||||
|
---
|
||||||
|
|
||||||
|
## Content
|
||||||
|
[Full text of tweet/thread]
|
||||||
|
|
||||||
|
## Agent Notes
|
||||||
|
**Why this matters:** [1-2 sentences]
|
||||||
|
**What surprised me:** [Anything unexpected — the extractor needs this to avoid confirming your priors]
|
||||||
|
**What I expected but didn't find:** [Gaps or missing evidence you noticed]
|
||||||
|
**KB connections:** [Which existing claims relate?]
|
||||||
|
**Extraction hints:** [What claims might an extractor pull?]
|
||||||
|
**Context:** [Who is the author, what debate is this part of?]
|
||||||
|
|
||||||
|
## Curator Notes (structured handoff for extractor)
|
||||||
|
PRIMARY CONNECTION: [exact claim title this source most relates to]
|
||||||
|
WHY ARCHIVED: [what pattern or tension this evidences]
|
||||||
|
EXTRACTION HINT: [what the extractor should focus on — scopes attention]
|
||||||
|
|
||||||
|
### Step 5 Rules:
|
||||||
|
- Archive EVERYTHING substantive, not just what supports your views
|
||||||
|
- Set all sources to status: unprocessed (a DIFFERENT instance will extract)
|
||||||
|
- Flag cross-domain sources with flagged_for_{agent}: [\"reason\"]
|
||||||
|
- Do NOT extract claims yourself — write good notes so the extractor can
|
||||||
|
- Check inbox/archive/ for duplicates before creating new archives
|
||||||
|
- Aim for 5-15 source archives per session
|
||||||
|
|
||||||
|
### Step 6: Flag Follow-up Directions (5 min)
|
||||||
|
At the bottom of your research musing (agents/${AGENT}/musings/research-${DATE}.md), add a section:
|
||||||
|
|
||||||
|
## Follow-up Directions
|
||||||
|
|
||||||
|
Three categories — be specific, not vague:
|
||||||
|
|
||||||
|
### Active Threads (continue next session)
|
||||||
|
- [Thread]: [What to do next, what you'd look for]
|
||||||
|
|
||||||
|
### Dead Ends (don't re-run these)
|
||||||
|
- [What you searched for]: [Why it was empty — saves future you from wasting time]
|
||||||
|
|
||||||
|
### Branching Points (one finding opened multiple directions)
|
||||||
|
- [Finding]: [Direction A vs Direction B — which to pursue first and why]
|
||||||
|
|
||||||
|
### Step 7: Update Research Journal (3 min)
|
||||||
|
Append to agents/${AGENT}/research-journal.md (create if it doesn't exist). This is your cross-session memory — NOT the same as the musing.
|
||||||
|
|
||||||
|
Format:
|
||||||
|
## Session ${DATE}
|
||||||
|
**Question:** [your research question]
|
||||||
|
**Key finding:** [most important thing you learned]
|
||||||
|
**Pattern update:** [did this session confirm, challenge, or extend a pattern you've been tracking?]
|
||||||
|
**Confidence shift:** [did any of your beliefs get stronger or weaker?]
|
||||||
|
|
||||||
|
The journal accumulates session over session. After 5+ sessions, review it for cross-session patterns — when independent sources keep converging on the same observation, that's a claim candidate.
|
||||||
|
|
||||||
|
### Step 8: Stop
|
||||||
|
When you've finished archiving sources, updating your musing, and writing the research journal entry, STOP. Do not try to commit or push — the script handles all git operations after you finish."
|
||||||
|
|
||||||
|
# --- Run Claude research session ---
|
||||||
|
log "Starting Claude research session..."
|
||||||
|
timeout 5400 "$CLAUDE_BIN" -p "$RESEARCH_PROMPT" \
|
||||||
|
--allowedTools 'Read,Write,Edit,Glob,Grep' \
|
||||||
|
--model sonnet \
|
||||||
|
--permission-mode bypassPermissions \
|
||||||
|
>> "$LOG" 2>&1 || {
|
||||||
|
log "WARN: Research session failed or timed out for $AGENT"
|
||||||
|
git checkout main >> "$LOG" 2>&1
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
log "Claude session complete"
|
||||||
|
|
||||||
|
# --- Check for changes ---
|
||||||
|
CHANGED_FILES=$(git status --porcelain)
|
||||||
|
if [ -z "$CHANGED_FILES" ]; then
|
||||||
|
log "No sources archived by $AGENT"
|
||||||
|
git checkout main >> "$LOG" 2>&1
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
# --- Stage and commit ---
|
||||||
|
git add inbox/archive/ agents/${AGENT}/musings/ agents/${AGENT}/research-journal.md 2>/dev/null || true
|
||||||
|
|
||||||
|
if git diff --cached --quiet; then
|
||||||
|
log "No valid changes to commit"
|
||||||
|
git checkout main >> "$LOG" 2>&1
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
AGENT_UPPER=$(echo "$AGENT" | sed 's/./\U&/')
|
||||||
|
SOURCE_COUNT=$(git diff --cached --name-only | grep -c "^inbox/archive/" || echo "0")
|
||||||
|
git commit -m "${AGENT}: research session ${DATE} — ${SOURCE_COUNT} sources archived
|
||||||
|
|
||||||
|
Pentagon-Agent: ${AGENT_UPPER} <HEADLESS>" >> "$LOG" 2>&1
|
||||||
|
|
||||||
|
# --- Push ---
|
||||||
|
git push -u origin "$BRANCH" --force >> "$LOG" 2>&1
|
||||||
|
log "Pushed $BRANCH"
|
||||||
|
|
||||||
|
# --- Check for existing PR on this branch ---
|
||||||
|
EXISTING_PR=$(curl -s "${FORGEJO_URL}/api/v1/repos/teleo/teleo-codex/pulls?state=open" \
|
||||||
|
-H "Authorization: token $AGENT_TOKEN" \
|
||||||
|
| jq -r ".[] | select(.head.ref == \"$BRANCH\") | .number" 2>/dev/null)
|
||||||
|
|
||||||
|
if [ -n "$EXISTING_PR" ]; then
|
||||||
|
log "PR already exists for $BRANCH (#$EXISTING_PR), skipping creation"
|
||||||
|
else
|
||||||
|
# --- Open PR ---
|
||||||
|
PR_JSON=$(jq -n \
|
||||||
|
--arg title "${AGENT}: research session ${DATE}" \
|
||||||
|
--arg body "## Self-Directed Research
|
||||||
|
|
||||||
|
Automated research session for ${AGENT} (${DOMAIN}).
|
||||||
|
|
||||||
|
Sources archived with status: unprocessed — extract cron will handle claim extraction separately.
|
||||||
|
|
||||||
|
Researcher and extractor are different Claude instances to prevent motivated reasoning." \
|
||||||
|
--arg base "main" \
|
||||||
|
--arg head "$BRANCH" \
|
||||||
|
'{title: $title, body: $body, base: $base, head: $head}')
|
||||||
|
|
||||||
|
PR_RESULT=$(curl -s -X POST "${FORGEJO_URL}/api/v1/repos/teleo/teleo-codex/pulls" \
|
||||||
|
-H "Authorization: token $AGENT_TOKEN" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d "$PR_JSON" 2>&1)
|
||||||
|
|
||||||
|
PR_NUMBER=$(echo "$PR_RESULT" | jq -r '.number // "unknown"' 2>/dev/null || echo "unknown")
|
||||||
|
log "PR #${PR_NUMBER} opened for ${AGENT}'s research session"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# --- Back to main ---
|
||||||
|
git checkout main >> "$LOG" 2>&1
|
||||||
|
log "=== Research session complete for $AGENT ==="
|
||||||
169
ops/self-directed-research.md
Normal file
169
ops/self-directed-research.md
Normal file
|
|
@ -0,0 +1,169 @@
|
||||||
|
# Self-Directed Research Architecture
|
||||||
|
|
||||||
|
Draft — Leo, 2026-03-10
|
||||||
|
|
||||||
|
## Core Idea
|
||||||
|
|
||||||
|
Each agent gets a daily research session on the VPS. They autonomously pull tweets from their domain accounts, decide what's interesting, archive sources with notes, and push to inbox. A separate extraction cron (already running) picks up the archives and makes claims. The researcher never sees the extraction — preventing motivated reasoning.
|
||||||
|
|
||||||
|
## Why Separate Researcher and Extractor
|
||||||
|
|
||||||
|
When the same agent researches and extracts, they prime themselves. The researcher finds a tweet they think supports a thesis → writes notes emphasizing that angle → extracts a claim that confirms the thesis. The extraction becomes a formality.
|
||||||
|
|
||||||
|
Separation breaks this:
|
||||||
|
- **Researcher** writes: "This tweet is about X, connects to Y, might challenge Z"
|
||||||
|
- **Extractor** (different Claude instance, fresh context) reads the source and notes, extracts what's actually there
|
||||||
|
- Neither has the other's context window or priming
|
||||||
|
|
||||||
|
This mirrors our proposer-evaluator separation for claims, applied one layer earlier in the pipeline.
|
||||||
|
|
||||||
|
## Architecture
|
||||||
|
|
||||||
|
### Three cron stages on VPS
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────┐ ┌──────────────────┐ ┌─────────────────┐
|
||||||
|
│ Research Cron │────▶│ Extract Cron │────▶│ Eval Pipeline │
|
||||||
|
│ (daily, 2hr) │ │ (every 5 min) │ │ (webhook.py) │
|
||||||
|
│ │ │ │ │ │
|
||||||
|
│ Pull tweets │ │ Read archives │ │ Review claims │
|
||||||
|
│ Pick 1 task │ │ Extract claims │ │ Approve/reject │
|
||||||
|
│ Archive sources │ │ Open PR │ │ Merge │
|
||||||
|
│ Push branch+PR │ │ │ │ │
|
||||||
|
└─────────────────┘ └──────────────────┘ └─────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
### Research Cron: `research-session.sh`
|
||||||
|
|
||||||
|
**Schedule:** Once daily, staggered across agents to respect rate limits
|
||||||
|
|
||||||
|
```
|
||||||
|
# Stagger: each agent gets a 90-min window, overnight PST (10pm-7am)
|
||||||
|
0 22 * * * /opt/teleo-eval/research-session.sh rio
|
||||||
|
30 23 * * * /opt/teleo-eval/research-session.sh clay
|
||||||
|
0 1 * * * /opt/teleo-eval/research-session.sh theseus
|
||||||
|
30 2 * * * /opt/teleo-eval/research-session.sh vida
|
||||||
|
0 4 * * * /opt/teleo-eval/research-session.sh astra
|
||||||
|
30 5 * * * /opt/teleo-eval/research-session.sh leo
|
||||||
|
```
|
||||||
|
|
||||||
|
**Per agent, the research session (~90 min):**
|
||||||
|
|
||||||
|
1. Pull latest tweets from agent's network accounts (X API)
|
||||||
|
2. Read the agent's beliefs, recent claims, open positions
|
||||||
|
3. Claude prompt: "You are {agent}. Here are your latest tweets from {accounts}. Here is your current knowledge state. Pick ONE research direction that advances your domain understanding. Archive the most relevant sources with notes."
|
||||||
|
4. Agent writes source archives to `inbox/archive/` with `status: unprocessed`
|
||||||
|
5. Commit, push to branch, open PR (source-only, no claims)
|
||||||
|
6. Extract cron picks them up within 5 minutes
|
||||||
|
|
||||||
|
**Key constraint:** One Claude session per agent, ~90 minutes, Sonnet model. Total daily VPS research compute: ~9 hours of sequential Sonnet sessions (staggered overnight).
|
||||||
|
|
||||||
|
### Research Prompt Structure
|
||||||
|
|
||||||
|
```
|
||||||
|
You are {agent}, a Teleo knowledge base agent specializing in {domain}.
|
||||||
|
|
||||||
|
## Your Current State
|
||||||
|
{Read from agents/{agent}/beliefs.md, reasoning.md, positions/}
|
||||||
|
|
||||||
|
## Your Network
|
||||||
|
{Read from network file — accounts to monitor}
|
||||||
|
|
||||||
|
## Recent Tweets
|
||||||
|
{Raw tweet data pulled from X API}
|
||||||
|
|
||||||
|
## Your Task
|
||||||
|
1. Scan these tweets for anything substantive — new claims, evidence,
|
||||||
|
debates, data, counterarguments to existing KB positions
|
||||||
|
2. Pick ONE research direction that would most advance your domain
|
||||||
|
understanding right now. Consider:
|
||||||
|
- Gaps in your beliefs that need evidence
|
||||||
|
- Claims in the KB that might be wrong
|
||||||
|
- Cross-domain connections you've been flagged about
|
||||||
|
- New developments that change the landscape
|
||||||
|
3. Archive the relevant sources (5-15 per session) following the
|
||||||
|
inbox/archive format with full agent notes
|
||||||
|
4. Write a brief research summary explaining what you found and why
|
||||||
|
it matters
|
||||||
|
|
||||||
|
## Rules
|
||||||
|
- Archive EVERYTHING substantive, not just what supports your views
|
||||||
|
- Write honest agent notes — flag what challenges your beliefs too
|
||||||
|
- Set all sources to status: unprocessed (a different instance extracts)
|
||||||
|
- Flag cross-domain sources for other agents
|
||||||
|
- Do NOT extract claims yourself — that's a separate process
|
||||||
|
```
|
||||||
|
|
||||||
|
### Capacity on Claude Max ($200/month)
|
||||||
|
|
||||||
|
**VPS compute budget (all Sonnet):**
|
||||||
|
- Research cron: 6 agents × 90 min/day = 9 hr/day (overnight)
|
||||||
|
- Extract cron: ~37 sources × 10 min = 6 hr one-time backlog, then ~1 hr/day steady-state
|
||||||
|
- Eval pipeline: ~10 PRs/day × 15 min = 2.5 hr/day
|
||||||
|
- **Total VPS:** ~6.5 hr/day Sonnet (steady state)
|
||||||
|
|
||||||
|
**Laptop compute budget (Opus + Sonnet mix):**
|
||||||
|
- Agent sessions: 2-3 concurrent, ~4-6 hr/day
|
||||||
|
- Leo coordination: ~1-2 hr/day
|
||||||
|
|
||||||
|
**Single subscription feasibility:** Tight but workable if:
|
||||||
|
- VPS runs overnight (2am-8am staggered research + continuous extraction)
|
||||||
|
- Laptop agents run during the day
|
||||||
|
- Never more than 2-3 concurrent sessions total
|
||||||
|
- VPS uses Sonnet exclusively (cheaper rate limits)
|
||||||
|
|
||||||
|
**Risk:** If rate limits tighten or daily message caps exist, the VPS research cron may not complete all 6 agents. Mitigation: priority ordering (run the 3 most active agents daily, others every 2-3 days).
|
||||||
|
|
||||||
|
## Contributor Workflow Options
|
||||||
|
|
||||||
|
Different people want different levels of involvement:
|
||||||
|
|
||||||
|
### Mode 1: Full Researcher
|
||||||
|
"I found this, here's why it matters, here are the KB connections"
|
||||||
|
- Uses /ingest on laptop (Track A or B)
|
||||||
|
- Writes detailed agent notes
|
||||||
|
- May extract claims themselves
|
||||||
|
- Highest quality input
|
||||||
|
|
||||||
|
### Mode 2: Curator
|
||||||
|
"Here's a source, it's about X domain"
|
||||||
|
- Minimal archive file with domain tag and brief notes
|
||||||
|
- VPS extracts (Track B)
|
||||||
|
- Good enough for most sources
|
||||||
|
|
||||||
|
### Mode 3: Raw Dump
|
||||||
|
"Here are tweets, figure it out"
|
||||||
|
- Dumps raw JSON to VPS inbox-raw/
|
||||||
|
- Leo triages: decides domain, writes archive files
|
||||||
|
- VPS extracts from Leo's archives
|
||||||
|
- Lowest effort, decent quality (Leo's triage catches the important stuff)
|
||||||
|
|
||||||
|
### Mode 4: Self-Directed Agent (VPS)
|
||||||
|
"Agent, go research your domain"
|
||||||
|
- No human involvement beyond initial network setup
|
||||||
|
- Daily cron pulls tweets, agent picks direction, archives, extraction follows
|
||||||
|
- Quality depends on prompt engineering + eval pipeline catching errors
|
||||||
|
|
||||||
|
All four modes feed into the same extraction → eval pipeline. Quality varies, but the eval pipeline is the quality gate regardless.
|
||||||
|
|
||||||
|
## Open Questions
|
||||||
|
|
||||||
|
1. **Rate limits**: What are the actual Claude Max per-minute and per-day limits for headless Sonnet sessions? Need empirical data from this first extraction run.
|
||||||
|
|
||||||
|
2. **Research quality**: Will a 30-minute Sonnet session produce good enough research notes? Or does research require Opus-level reasoning?
|
||||||
|
|
||||||
|
3. **Network bootstrapping**: Agents need network files. Who curates the initial account lists? (Currently Cory + Leo, eventually agents propose additions)
|
||||||
|
|
||||||
|
4. **Cross-domain routing**: When the research cron finds cross-domain content, should it archive under the researcher's domain or the correct domain? (Probably correct domain with flagged_for_{researcher})
|
||||||
|
|
||||||
|
5. **Feedback loop**: How does extraction quality feed back to improve research notes? If the extractor consistently ignores certain types of notes, the researcher should learn.
|
||||||
|
|
||||||
|
6. **Deduplication across agents**: Multiple agents may archive the same tweet (e.g., a Karpathy tweet relevant to both AI systems and collective intelligence). The extract cron needs to detect this.
|
||||||
|
|
||||||
|
## Implementation Order
|
||||||
|
|
||||||
|
1. ✅ Extract cron (running now — validating extraction quality)
|
||||||
|
2. **Next**: Research cron — daily self-directed sessions per agent
|
||||||
|
3. **Then**: Raw dump path — Leo triage from JSON → archive
|
||||||
|
4. **Later**: Full end-to-end with X API pull integrated into research cron
|
||||||
|
5. **Eventually**: Feedback loops from eval quality → research prompt tuning
|
||||||
Loading…
Reference in a new issue