fix: wrap breaker calls in stage_loop to prevent permanent task death
Some checks are pending
CI / lint-and-test (push) Waiting to run

A transient DB lock in breaker.record_failure() inside an except handler
killed the asyncio coroutine permanently — snapshot_cycle died Apr 18 and
never recovered. All three breaker call sites now have their own try/except.

Also includes HTML injection fix for github_feedback review_text.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
m3taversal 2026-04-20 12:37:28 +01:00
parent 83526bc90e
commit cde92d3db1
2 changed files with 16 additions and 5 deletions

View file

@ -125,7 +125,8 @@ async def on_eval_complete(conn, forgejo_pr: int, *, outcome: str, review_text:
if outcome == "approved":
body = "**Evaluation: Approved**\n\nYour contribution passed automated review and is queued for merge."
if review_text:
body += f"\n\n<details>\n<summary>Review details</summary>\n\n{review_text[:3000]}\n\n</details>"
safe_text = review_text[:3000].replace("</details>", "&lt;/details&gt;")
body += f"\n\n<details>\n<summary>Review details</summary>\n\n{safe_text}\n\n</details>"
elif outcome == "rejected":
body = "**Evaluation: Changes Requested**\n\n"
if issues:
@ -133,7 +134,8 @@ async def on_eval_complete(conn, forgejo_pr: int, *, outcome: str, review_text:
for issue in issues:
body += f"- {issue}\n"
if review_text:
body += f"\n<details>\n<summary>Full review</summary>\n\n{review_text[:3000]}\n\n</details>"
safe_text = review_text[:3000].replace("</details>", "&lt;/details&gt;")
body += f"\n<details>\n<summary>Full review</summary>\n\n{safe_text}\n\n</details>"
body += (
"\n\nThe pipeline will attempt automated fixes where possible. "
"If fixes fail, the PR will be closed — you're welcome to resubmit."

View file

@ -47,12 +47,21 @@ async def stage_loop(name: str, interval: int, func, conn, breaker: CircuitBreak
workers = breaker.max_workers()
succeeded, failed = await func(conn, max_workers=workers)
if failed > 0 and succeeded == 0:
breaker.record_failure()
try:
breaker.record_failure()
except Exception:
logger.warning("Stage %s: breaker write failed", name)
elif succeeded > 0:
breaker.record_success()
try:
breaker.record_success()
except Exception:
logger.warning("Stage %s: breaker write failed", name)
except Exception:
logger.exception("Stage %s: unhandled error in cycle", name)
breaker.record_failure()
try:
breaker.record_failure()
except Exception:
logger.warning("Stage %s: breaker write failed", name)
# Wait for interval or shutdown, whichever comes first
try: