Auto: 2 files | 2 files changed, 71 insertions(+), 45 deletions(-)

2026-03-10 12:03:40 +00:00 · 2026-03-10 12:03:40 +00:00 · 1c6aab23bc
commit 1c6aab23bc
parent b1dafa2ca8
2 changed files with 71 additions and 45 deletions
--- a/ops/research-session.sh
+++ b/ops/research-session.sh
@ -42,7 +42,8 @@ if [ -f "$LOCKFILE" ]; then
    rm -f "$LOCKFILE"
 fi
 echo $$ > "$LOCKFILE"
-trap 'rm -f "$LOCKFILE"' EXIT
+TWEET_FILE="/tmp/research-tweets-${AGENT}.md"
+trap 'rm -f "$LOCKFILE" "$TWEET_FILE"' EXIT

 log "=== Starting research session for $AGENT ==="

@ -91,7 +92,14 @@ for acct in data.get('accounts', []):
 " 2>/dev/null || true)

    TWEET_DATA=""
+    API_CALLS=0
+    API_CACHED=0
    for USERNAME in $ACCOUNTS; do
+        # Validate username (Twitter handles are alphanumeric + underscore only)
+        if [[ ! "$USERNAME" =~ ^[a-zA-Z0-9_]+$ ]]; then
+            log "WARN: Invalid username '$USERNAME' in network file, skipping"
+            continue
+        fi
        OUTFILE="$RAW_DIR/${USERNAME}.json"
        # Only pull if file doesn't exist or is older than 12 hours
        if [ ! -f "$OUTFILE" ] || [ $(find "$OUTFILE" -mmin +720 2>/dev/null | wc -l) -gt 0 ]; then
@ -102,7 +110,10 @@ for acct in data.get('accounts', []):
                log "WARN: Failed to pull @${USERNAME}"
                continue
            }
+            API_CALLS=$((API_CALLS + 1))
            sleep 2  # Rate limit courtesy
+        else
+            API_CACHED=$((API_CACHED + 1))
        fi
        if [ -f "$OUTFILE" ]; then
            TWEET_DATA="${TWEET_DATA}
@ -125,6 +136,14 @@ except Exception as e:
 " 2>/dev/null || echo "(failed to parse)")"
        fi
    done
+    log "API usage: ${API_CALLS} calls, ${API_CACHED} cached for ${AGENT}"
+    # Append to cumulative usage log (create with header if new)
+    USAGE_CSV="/opt/teleo-eval/logs/x-api-usage.csv"
+    if [ ! -f "$USAGE_CSV" ]; then
+        echo "date,agent,api_calls,cached,accounts_total" > "$USAGE_CSV"
+    fi
+    ACCOUNT_COUNT=$(echo "$ACCOUNTS" | wc -w | tr -d ' ')
+    echo "${DATE},${AGENT},${API_CALLS},${API_CACHED},${ACCOUNT_COUNT}" >> "$USAGE_CSV"
 fi

 # --- Also check for any raw JSON dumps in inbox-raw ---
@ -161,7 +180,6 @@ log "On branch $BRANCH"

 # --- Build the research prompt ---
 # Write tweet data to a temp file so Claude can read it
-TWEET_FILE="/tmp/research-tweets-${AGENT}.md"
 echo "$TWEET_DATA" > "$TWEET_FILE"

 RESEARCH_PROMPT="You are ${AGENT}, a Teleo knowledge base agent. Domain: ${DOMAIN}.
@ -295,47 +313,55 @@ if [ -z "$CHANGED_FILES" ]; then
    exit 0
 fi

-# --- Stage and commit if Claude didn't already ---
-if ! git log --oneline -1 | grep -q "research session"; then
-    # Claude didn't commit — do it manually
-    git add inbox/archive/ agents/${AGENT}/musings/ agents/${AGENT}/research-journal.md 2>/dev/null || true
+# --- Stage and commit ---
+git add inbox/archive/ agents/${AGENT}/musings/ agents/${AGENT}/research-journal.md 2>/dev/null || true

-    if git diff --cached --quiet; then
-        log "No valid changes to commit"
-        git checkout main >> "$LOG" 2>&1
-        exit 0
-    fi
+if git diff --cached --quiet; then
+    log "No valid changes to commit"
+    git checkout main >> "$LOG" 2>&1
+    exit 0
+fi

-    AGENT_UPPER=$(echo "$AGENT" | sed 's/./\U&/')
-    SOURCE_COUNT=$(git diff --cached --name-only | grep -c "^inbox/archive/" || echo "0")
-    git commit -m "${AGENT}: research session ${DATE} — ${SOURCE_COUNT} sources archived
+AGENT_UPPER=$(echo "$AGENT" | sed 's/./\U&/')
+SOURCE_COUNT=$(git diff --cached --name-only | grep -c "^inbox/archive/" || echo "0")
+git commit -m "${AGENT}: research session ${DATE} — ${SOURCE_COUNT} sources archived

 Pentagon-Agent: ${AGENT_UPPER} <HEADLESS>" >> "$LOG" 2>&1
-fi

 # --- Push ---
 git push -u origin "$BRANCH" --force >> "$LOG" 2>&1
 log "Pushed $BRANCH"

-# --- Open PR ---
-PR_JSON=$(python3 -c "
-import json
-data = {
-    'title': '${AGENT}: research session ${DATE}',
-    'body': '## Self-Directed Research\\n\\nAutomated research session for ${AGENT} (${DOMAIN}).\\n\\nSources archived with status: unprocessed — extract cron will handle claim extraction separately.\\n\\nResearcher and extractor are different Claude instances to prevent motivated reasoning.',
-    'base': 'main',
-    'head': '${BRANCH}'
-}
-print(json.dumps(data))
-")
-
-PR_RESULT=$(curl -s -X POST "${FORGEJO_URL}/api/v1/repos/teleo/teleo-codex/pulls" \
+# --- Check for existing PR on this branch ---
+EXISTING_PR=$(curl -s "${FORGEJO_URL}/api/v1/repos/teleo/teleo-codex/pulls?state=open" \
    -H "Authorization: token $AGENT_TOKEN" \
-    -H "Content-Type: application/json" \
-    -d "$PR_JSON" 2>&1)
+    | jq -r ".[] | select(.head.ref == \"$BRANCH\") | .number" 2>/dev/null)

-PR_NUMBER=$(echo "$PR_RESULT" | python3 -c "import sys,json; print(json.load(sys.stdin).get('number','unknown'))" 2>/dev/null || echo "unknown")
-log "PR #${PR_NUMBER} opened for ${AGENT}'s research session"
+if [ -n "$EXISTING_PR" ]; then
+    log "PR already exists for $BRANCH (#$EXISTING_PR), skipping creation"
+else
+    # --- Open PR ---
+    PR_JSON=$(jq -n \
+        --arg title "${AGENT}: research session ${DATE}" \
+        --arg body "## Self-Directed Research
+
+Automated research session for ${AGENT} (${DOMAIN}).
+
+Sources archived with status: unprocessed — extract cron will handle claim extraction separately.
+
+Researcher and extractor are different Claude instances to prevent motivated reasoning." \
+        --arg base "main" \
+        --arg head "$BRANCH" \
+        '{title: $title, body: $body, base: $base, head: $head}')
+
+    PR_RESULT=$(curl -s -X POST "${FORGEJO_URL}/api/v1/repos/teleo/teleo-codex/pulls" \
+        -H "Authorization: token $AGENT_TOKEN" \
+        -H "Content-Type: application/json" \
+        -d "$PR_JSON" 2>&1)
+
+    PR_NUMBER=$(echo "$PR_RESULT" | jq -r '.number // "unknown"' 2>/dev/null || echo "unknown")
+    log "PR #${PR_NUMBER} opened for ${AGENT}'s research session"
+fi

 # --- Back to main ---
 git checkout main >> "$LOG" 2>&1
--- a/ops/self-directed-research.md
+++ b/ops/self-directed-research.md
@ -29,34 +29,34 @@ This mirrors our proposer-evaluator separation for claims, applied one layer ear
 │  Pull tweets     │     │  Read archives    │     │  Review claims  │
 │  Pick 1 task     │     │  Extract claims   │     │  Approve/reject │
 │  Archive sources │     │  Open PR          │     │  Merge          │
-│  Push to main    │     │                   │     │                 │
+│  Push branch+PR  │     │                   │     │                 │
 └─────────────────┘     └──────────────────┘     └─────────────────┘
 ```

-### Research Cron: `research-daily.sh`
+### Research Cron: `research-session.sh`

 **Schedule:** Once daily, staggered across agents to respect rate limits

 ```
-# Stagger: each agent gets a 30-min window
-0  2 * * * /opt/teleo-eval/research-daily.sh rio
-30 2 * * * /opt/teleo-eval/research-daily.sh clay
-0  3 * * * /opt/teleo-eval/research-daily.sh theseus
-30 3 * * * /opt/teleo-eval/research-daily.sh vida
-0  4 * * * /opt/teleo-eval/research-daily.sh astra
-30 4 * * * /opt/teleo-eval/research-daily.sh leo
+# Stagger: each agent gets a 90-min window, overnight PST (10pm-7am)
+0  22 * * * /opt/teleo-eval/research-session.sh rio
+30 23 * * * /opt/teleo-eval/research-session.sh clay
+0   1 * * * /opt/teleo-eval/research-session.sh theseus
+30  2 * * * /opt/teleo-eval/research-session.sh vida
+0   4 * * * /opt/teleo-eval/research-session.sh astra
+30  5 * * * /opt/teleo-eval/research-session.sh leo
 ```

-**Per agent, the research session:**
+**Per agent, the research session (~90 min):**

 1. Pull latest tweets from agent's network accounts (X API)
 2. Read the agent's beliefs, recent claims, open positions
 3. Claude prompt: "You are {agent}. Here are your latest tweets from {accounts}. Here is your current knowledge state. Pick ONE research direction that advances your domain understanding. Archive the most relevant sources with notes."
 4. Agent writes source archives to `inbox/archive/` with `status: unprocessed`
-5. Commit and push to main (source-only, no claims)
+5. Commit, push to branch, open PR (source-only, no claims)
 6. Extract cron picks them up within 5 minutes

-**Key constraint:** One Claude session per agent, ~20-30 minutes, Sonnet model. Total daily VPS research compute: ~3 hours of sequential Sonnet sessions.
+**Key constraint:** One Claude session per agent, ~90 minutes, Sonnet model. Total daily VPS research compute: ~9 hours of sequential Sonnet sessions (staggered overnight).

 ### Research Prompt Structure

@ -97,7 +97,7 @@ You are {agent}, a Teleo knowledge base agent specializing in {domain}.
 ### Capacity on Claude Max ($200/month)

 **VPS compute budget (all Sonnet):**
- Research cron: 6 agents × 30 min/day = 3 hr/day
+- Research cron: 6 agents × 90 min/day = 9 hr/day (overnight)
 - Extract cron: ~37 sources × 10 min = 6 hr one-time backlog, then ~1 hr/day steady-state
 - Eval pipeline: ~10 PRs/day × 15 min = 2.5 hr/day
 - **Total VPS:** ~6.5 hr/day Sonnet (steady state)